GSdx: Format

This commit is contained in:
TellowKrinkle 2021-04-12 04:31:30 -05:00 committed by tellowkrinkle
parent fafbb3cc63
commit ae1bc651d6
128 changed files with 11631 additions and 9678 deletions

View File

@ -54,8 +54,8 @@ extern bool RunLinuxDialog();
#define PS2E_LT_GS 0x01
#define PS2E_GS_VERSION 0x0006
#define PS2E_X86 0x01 // 32 bit
#define PS2E_X86_64 0x02 // 64 bit
#define PS2E_X86 0x01 // 32 bit
#define PS2E_X86_64 0x02 // 64 bit
static GSRenderer* s_gs = NULL;
static void (*s_irq)() = NULL;
@ -100,7 +100,7 @@ EXPORT_C GSsetBaseMem(uint8* mem)
{
s_basemem = mem;
if(s_gs)
if (s_gs)
{
s_gs->SetRegsMem(s_basemem);
}
@ -113,7 +113,7 @@ EXPORT_C GSsetSettingsDir(const char* dir)
EXPORT_C_(int) GSinit()
{
if(!GSUtil::CheckSSE())
if (!GSUtil::CheckSSE())
{
return -1;
}
@ -148,7 +148,7 @@ EXPORT_C GSshutdown()
theApp.SetCurrentRendererType(GSRendererType::Undefined);
#ifdef _WIN32
if(SUCCEEDED(s_hr))
if (SUCCEEDED(s_hr))
{
::CoUninitialize();
@ -161,7 +161,8 @@ EXPORT_C GSclose()
{
gsopen_done = false;
if(s_gs == NULL) return;
if (s_gs == NULL)
return;
s_gs->ResetDevice();
@ -183,7 +184,7 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
bool old_api = *dsp == NULL;
// Fresh start up or config file changed
if(renderer == GSRendererType::Undefined)
if (renderer == GSRendererType::Undefined)
{
renderer = static_cast<GSRendererType>(theApp.GetConfigI("Renderer"));
#ifdef _WIN32
@ -192,7 +193,7 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
#endif
}
if(threads == -1)
if (threads == -1)
{
threads = theApp.GetConfigI("extrathreads");
}
@ -223,7 +224,8 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
#if defined(__unix__)
// Note: EGL code use GLX otherwise maybe it could be also compatible with Windows
// Yes OpenGL code isn't complicated enough !
switch (GSWndEGL::SelectPlatform()) {
switch (GSWndEGL::SelectPlatform())
{
#if GS_EGL_X11
case EGL_PLATFORM_X11_KHR:
wnds.push_back(std::make_shared<GSWndEGL_X11>());
@ -257,12 +259,12 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
int w = theApp.GetConfigI("ModeWidth");
int h = theApp.GetConfigI("ModeHeight");
#if defined(__unix__)
void *win_handle = (void*)((uptr*)(dsp)+1);
void* win_handle = (void*)((uptr*)(dsp) + 1);
#else
void *win_handle = *dsp;
void* win_handle = *dsp;
#endif
for(auto& wnd : wnds)
for (auto& wnd : wnds)
{
try
{
@ -290,7 +292,7 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
}
}
if(!window)
if (!window)
{
GSclose();
@ -302,29 +304,29 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
switch (renderer)
{
default:
default:
#ifdef _WIN32
case GSRendererType::DX1011_HW:
dev = new GSDevice11();
s_renderer_name = "D3D11";
renderer_name = "Direct3D 11";
break;
case GSRendererType::DX1011_HW:
dev = new GSDevice11();
s_renderer_name = "D3D11";
renderer_name = "Direct3D 11";
break;
#endif
case GSRendererType::OGL_HW:
dev = new GSDeviceOGL();
s_renderer_name = "OGL";
renderer_name = "OpenGL";
break;
case GSRendererType::OGL_SW:
dev = new GSDeviceOGL();
s_renderer_name = "SW";
renderer_name = "Software";
break;
case GSRendererType::Null:
dev = new GSDeviceNull();
s_renderer_name = "NULL";
renderer_name = "Null";
break;
case GSRendererType::OGL_HW:
dev = new GSDeviceOGL();
s_renderer_name = "OGL";
renderer_name = "OpenGL";
break;
case GSRendererType::OGL_SW:
dev = new GSDeviceOGL();
s_renderer_name = "SW";
renderer_name = "Software";
break;
case GSRendererType::Null:
dev = new GSDeviceNull();
s_renderer_name = "NULL";
renderer_name = "Null";
break;
}
printf("Current Renderer: %s\n", renderer_name.c_str());
@ -338,21 +340,21 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
{
switch (renderer)
{
default:
default:
#ifdef _WIN32
case GSRendererType::DX1011_HW:
s_gs = (GSRenderer*)new GSRendererDX11();
break;
case GSRendererType::DX1011_HW:
s_gs = (GSRenderer*)new GSRendererDX11();
break;
#endif
case GSRendererType::OGL_HW:
s_gs = (GSRenderer*)new GSRendererOGL();
break;
case GSRendererType::OGL_SW:
s_gs = new GSRendererSW(threads);
break;
case GSRendererType::Null:
s_gs = new GSRendererNull();
break;
case GSRendererType::OGL_HW:
s_gs = (GSRenderer*)new GSRendererOGL();
break;
case GSRendererType::OGL_SW:
s_gs = new GSRendererSW(threads);
break;
case GSRendererType::Null:
s_gs = new GSRendererNull();
break;
}
if (s_gs == NULL)
return -1;
@ -375,10 +377,10 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
s_gs->SetIrqCallback(s_irq);
s_gs->SetVSync(s_vsync);
if(!old_api)
if (!old_api)
s_gs->SetMultithreaded(true);
if(!s_gs->CreateDevice(dev))
if (!s_gs->CreateDevice(dev))
{
// This probably means the user has DX11 configured with a video card that is only DX9
// compliant. Cound mean drivr issues of some sort also, but to be sure, that's the most
@ -389,7 +391,8 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
return -1;
}
if (renderer == GSRendererType::OGL_HW && theApp.GetConfigI("debug_glsl_shader") == 2) {
if (renderer == GSRendererType::OGL_HW && theApp.GetConfigI("debug_glsl_shader") == 2)
{
printf("GSdx: test OpenGL shader. Please wait...\n\n");
static_cast<GSDeviceOGL*>(s_gs->m_dev)->SelfShaderTest();
printf("\nGSdx: test OpenGL shader done. It will now exit\n");
@ -399,14 +402,16 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
return 0;
}
EXPORT_C_(void) GSosdLog(const char *utf8, uint32 color)
EXPORT_C_(void) GSosdLog(const char* utf8, uint32 color)
{
if(s_gs && s_gs->m_dev) s_gs->m_dev->m_osd.Log(utf8);
if (s_gs && s_gs->m_dev)
s_gs->m_dev->m_osd.Log(utf8);
}
EXPORT_C_(void) GSosdMonitor(const char *key, const char *value, uint32 color)
EXPORT_C_(void) GSosdMonitor(const char* key, const char* value, uint32 color)
{
if(s_gs && s_gs->m_dev) s_gs->m_dev->m_osd.Monitor(key, value);
if (s_gs && s_gs->m_dev)
s_gs->m_dev->m_osd.Monitor(key, value);
}
EXPORT_C_(int) GSopen2(void** dsp, uint32 flags)
@ -422,27 +427,25 @@ EXPORT_C_(int) GSopen2(void** dsp, uint32 flags)
// SW -> HW and HW -> SW (F9 Switch)
switch (current_renderer)
{
#ifdef _WIN32
#ifdef _WIN32
case GSRendererType::DX1011_HW:
current_renderer = GSRendererType::OGL_SW;
break;
#endif
#endif
case GSRendererType::OGL_SW:
#ifdef _WIN32
#ifdef _WIN32
{
const auto config_renderer = static_cast<GSRendererType>(
theApp.GetConfigI("Renderer")
);
const auto config_renderer = static_cast<GSRendererType>(theApp.GetConfigI("Renderer"));
if (current_renderer == config_renderer)
current_renderer = GSUtil::GetBestRenderer();
else
current_renderer = config_renderer;
}
#else
#else
current_renderer = GSRendererType::OGL_HW;
#endif
break;
#endif
break;
case GSRendererType::OGL_HW:
current_renderer = GSRendererType::OGL_SW;
break;
@ -456,7 +459,7 @@ EXPORT_C_(int) GSopen2(void** dsp, uint32 flags)
int retval = _GSopen(dsp, "", current_renderer);
if (s_gs != NULL)
s_gs->SetAspectRatio(0); // PCSX2 manages the aspect ratios
s_gs->SetAspectRatio(0); // PCSX2 manages the aspect ratios
gsopen_done = true;
@ -471,7 +474,7 @@ EXPORT_C_(int) GSopen(void** dsp, const char* title, int mt)
s_vsync = theApp.GetConfigI("vsync");
if(mt == 2)
if (mt == 2)
{
// pcsx2 sent a switch renderer request
mt = 1;
@ -487,7 +490,7 @@ EXPORT_C_(int) GSopen(void** dsp, const char* title, int mt)
int retval = _GSopen(dsp, title, renderer);
if(retval == 0 && s_gs)
if (retval == 0 && s_gs)
{
s_gs->SetMultithreaded(!!mt);
}
@ -642,13 +645,13 @@ EXPORT_C GSvsync(int field)
{
#ifdef _WIN32
if(s_gs->m_wnd->IsManaged())
if (s_gs->m_wnd->IsManaged())
{
MSG msg;
memset(&msg, 0, sizeof(msg));
while(msg.message != WM_QUIT && PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
while (msg.message != WM_QUIT && PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
{
TranslateMessage(&msg);
DispatchMessage(&msg);
@ -701,7 +704,7 @@ EXPORT_C GSkeyEvent(GSKeyEventData* e)
{
try
{
if(gsopen_done)
if (gsopen_done)
{
s_gs->KeyEvent(e);
}
@ -715,15 +718,15 @@ EXPORT_C_(int) GSfreeze(int mode, GSFreezeData* data)
{
try
{
if(mode == FREEZE_SAVE)
if (mode == FREEZE_SAVE)
{
return s_gs->Freeze(data, false);
}
else if(mode == FREEZE_SIZE)
else if (mode == FREEZE_SIZE)
{
return s_gs->Freeze(data, true);
}
else if(mode == FREEZE_LOAD)
else if (mode == FREEZE_LOAD)
{
return s_gs->Defrost(data);
}
@ -739,13 +742,14 @@ EXPORT_C GSconfigure()
{
try
{
if(!GSUtil::CheckSSE()) return;
if (!GSUtil::CheckSSE())
return;
theApp.Init();
#ifdef _WIN32
GSDialog::InitCommonControls();
if(GSSettingsDlg().DoModal() == IDOK)
if (GSSettingsDlg().DoModal() == IDOK)
{
// Force a reload of the gs state
theApp.SetCurrentRendererType(GSRendererType::Undefined);
@ -759,7 +763,8 @@ EXPORT_C GSconfigure()
// We can convince it that touching that pool would be unsafe by running all GTK calls within a CFRunLoop
// (Blocks submitted to the main queue by dispatch_async are run by its CFRunLoop)
dispatch_async(dispatch_get_main_queue(), ^{
if (RunLinuxDialog()) {
if (RunLinuxDialog())
{
theApp.ReloadConfig();
// Force a reload of the gs state
theApp.SetCurrentRendererType(GSRendererType::Undefined);
@ -767,22 +772,23 @@ EXPORT_C GSconfigure()
});
#else
if (RunLinuxDialog()) {
if (RunLinuxDialog())
{
theApp.ReloadConfig();
// Force a reload of the gs state
theApp.SetCurrentRendererType(GSRendererType::Undefined);
}
#endif
} catch (GSDXRecoverableError)
}
catch (GSDXRecoverableError)
{
}
}
EXPORT_C_(int) GStest()
{
if(!GSUtil::CheckSSE())
if (!GSUtil::CheckSSE())
return -1;
return 0;
@ -796,14 +802,15 @@ EXPORT_C GSirqCallback(void (*irq)())
{
s_irq = irq;
if(s_gs)
if (s_gs)
{
s_gs->SetIrqCallback(s_irq);
}
}
void pt(const char* str){
struct tm *current;
void pt(const char* str)
{
struct tm* current;
time_t now;
time(&now);
@ -814,12 +821,14 @@ void pt(const char* str){
EXPORT_C_(bool) GSsetupRecording(std::string& filename)
{
if (s_gs == NULL) {
if (s_gs == NULL)
{
printf("GSdx: no s_gs for recording\n");
return false;
}
#if defined(__unix__) || defined(__APPLE__)
if (!theApp.GetConfigB("capture_enabled")) {
if (!theApp.GetConfigB("capture_enabled"))
{
printf("GSdx: Recording is disabled\n");
return false;
}
@ -865,7 +874,7 @@ EXPORT_C GSgetTitleInfo2(char* dest, size_t length)
s.append(" | ").append(s_gs->m_GStitleInfoBuffer);
if(s.size() > length - 1)
if (s.size() > length - 1)
{
s = s.substr(0, length - 1);
}
@ -883,7 +892,7 @@ EXPORT_C GSsetVsync(int vsync)
{
s_vsync = vsync;
if(s_gs)
if (s_gs)
{
s_gs->SetVSync(s_vsync);
}
@ -893,7 +902,7 @@ EXPORT_C GSsetExclusive(int enabled)
{
s_exclusive = !!enabled;
if(s_gs)
if (s_gs)
{
s_gs->SetVSync(s_vsync);
}
@ -914,7 +923,8 @@ public:
: m_console(NULL)
, m_title(title)
{
if(open) Open();
if (open)
Open();
}
Console::~Console()
@ -924,7 +934,7 @@ public:
void Console::Open()
{
if(m_console == NULL)
if (m_console == NULL)
{
CONSOLE_SCREEN_BUFFER_INFO csbiInfo;
@ -962,7 +972,7 @@ public:
void Console::Close()
{
if(m_console != NULL)
if (m_console != NULL)
{
FreeConsole();
@ -983,10 +993,15 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
char* start = lpszCmdLine;
char* end = NULL;
long n = strtol(lpszCmdLine, &end, 10);
if(end > start) {renderer = static_cast<GSRendererType>(n); lpszCmdLine = end;}
if (end > start)
{
renderer = static_cast<GSRendererType>(n);
lpszCmdLine = end;
}
}
while(*lpszCmdLine == ' ') lpszCmdLine++;
while (*lpszCmdLine == ' ')
lpszCmdLine++;
::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS);
@ -1027,40 +1042,47 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
GSvsync(1);
struct Packet {uint8 type, param; uint32 size, addr; std::vector<uint8> buff;};
struct Packet
{
uint8 type, param;
uint32 size, addr;
std::vector<uint8> buff;
};
auto read_packet = [&file](uint8 type) {
Packet p;
p.type = type;
switch(p.type) {
case 0:
file->Read(&p.param, 1);
file->Read(&p.size, 4);
switch(p.param) {
switch (p.type)
{
case 0:
p.buff.resize(0x4000);
p.addr = 0x4000 - p.size;
file->Read(&p.buff[p.addr], p.size);
file->Read(&p.param, 1);
file->Read(&p.size, 4);
switch (p.param)
{
case 0:
p.buff.resize(0x4000);
p.addr = 0x4000 - p.size;
file->Read(&p.buff[p.addr], p.size);
break;
case 1:
case 2:
case 3:
p.buff.resize(p.size);
file->Read(p.buff.data(), p.size);
break;
}
break;
case 1:
case 2:
case 3:
p.buff.resize(p.size);
file->Read(p.buff.data(), p.size);
file->Read(&p.param, 1);
break;
case 2:
file->Read(&p.size, 4);
break;
case 3:
p.buff.resize(0x2000);
file->Read(p.buff.data(), 0x2000);
break;
}
break;
case 1:
file->Read(&p.param, 1);
break;
case 2:
file->Read(&p.size, 4);
break;
case 3:
p.buff.resize(0x2000);
file->Read(p.buff.data(), 0x2000);
break;
}
return p;
@ -1068,37 +1090,37 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
std::list<Packet> packets;
uint8 type;
while(file->Read(&type, 1))
while (file->Read(&type, 1))
packets.push_back(read_packet(type));
Sleep(100);
std::vector<uint8> buff;
while(IsWindowVisible(hWnd))
while (IsWindowVisible(hWnd))
{
for(auto &p : packets)
for (auto& p : packets)
{
switch(p.type)
switch (p.type)
{
case 0:
switch(p.param)
{
case 0: GSgifTransfer1(p.buff.data(), p.addr); break;
case 1: GSgifTransfer2(p.buff.data(), p.size / 16); break;
case 2: GSgifTransfer3(p.buff.data(), p.size / 16); break;
case 3: GSgifTransfer(p.buff.data(), p.size / 16); break;
}
break;
case 1:
GSvsync(p.param);
break;
case 2:
if(buff.size() < p.size) buff.resize(p.size);
GSreadFIFO2(p.buff.data(), p.size / 16);
break;
case 3:
memcpy(regs.data(), p.buff.data(), 0x2000);
break;
case 0:
switch(p.param)
{
case 0: GSgifTransfer1(p.buff.data(), p.addr); break;
case 1: GSgifTransfer2(p.buff.data(), p.size / 16); break;
case 2: GSgifTransfer3(p.buff.data(), p.size / 16); break;
case 3: GSgifTransfer(p.buff.data(), p.size / 16); break;
}
break;
case 1:
GSvsync(p.param);
break;
case 2:
if(buff.size() < p.size) buff.resize(p.size);
GSreadFIFO2(p.buff.data(), p.size / 16);
break;
case 3:
memcpy(regs.data(), p.buff.data(), 0x2000);
break;
}
}
}
@ -1115,7 +1137,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
Console console("GSdx", true);
if(1)
if (1)
{
GSLocalMemory* mem = new GSLocalMemory();
@ -1138,11 +1160,12 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;
for (int i = 0; i < 1024 * 1024 * 4; i++)
ptr[i] = (uint8)i;
//
for(int tbw = 5; tbw <= 10; tbw++)
for (int tbw = 5; tbw <= 10; tbw++)
{
int n = 256 << ((10 - tbw) * 2);
@ -1151,7 +1174,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
printf("%d x %d\n\n", w, h);
for(size_t i = 0; i < countof(s_format); i++)
for (size_t i = 0; i < countof(s_format); i++)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[s_format[i].psm];
@ -1203,7 +1226,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
start = clock();
for(int j = 0; j < n; j++)
for (int j = 0; j < n; j++)
{
int x = 0;
int y = 0;
@ -1217,7 +1240,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
start = clock();
for(int j = 0; j < n; j++)
for (int j = 0; j < n; j++)
{
int x = 0;
int y = 0;
@ -1233,7 +1256,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
start = clock();
for(int j = 0; j < n; j++)
for (int j = 0; j < n; j++)
{
(mem->*rtx)(off, r, ptr, w * 4, TEXA);
}
@ -1242,11 +1265,11 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
printf("%6d %6d ", (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000));
if(psm.pal > 0)
if (psm.pal > 0)
{
start = clock();
for(int j = 0; j < n; j++)
for (int j = 0; j < n; j++)
{
(mem->*rtxP)(off, r, ptr, w, TEXA);
}
@ -1269,13 +1292,14 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
//
if(0)
if (0)
{
GSLocalMemory* mem = new GSLocalMemory();
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;
for (int i = 0; i < 1024 * 1024 * 4; i++)
ptr[i] = (uint8)i;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[PSM_PSMCT32];
@ -1320,7 +1344,7 @@ inline unsigned long timeGetTime()
{
struct timespec t;
clock_gettime(CLOCK_REALTIME, &t);
return (unsigned long)(t.tv_sec*1000 + t.tv_nsec/1000000);
return (unsigned long)(t.tv_sec * 1000 + t.tv_nsec / 1000000);
}
// Note
@ -1344,7 +1368,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
return;
}
struct Packet {uint8 type, param; uint32 size, addr; std::vector<uint8> buff;};
struct Packet
{
uint8 type, param;
uint32 size, addr;
std::vector<uint8> buff;
};
std::list<Packet*> packets;
std::vector<uint8> buff;
@ -1356,7 +1385,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
int finished = theApp.GetConfigI("linux_replay");
bool repack_dump = (finished < 0);
if (theApp.GetConfigI("dump")) {
if (theApp.GetConfigI("dump"))
{
fprintf(stderr, "Dump is enabled. Replay will be disabled\n");
finished = 1;
}
@ -1365,19 +1395,21 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
void* hWnd = NULL;
int err = _GSopen((void**)&hWnd, "", m_renderer);
if (err != 0) {
if (err != 0)
{
fprintf(stderr, "Error failed to GSopen\n");
return;
}
if (s_gs->m_wnd == NULL) return;
if (s_gs->m_wnd == NULL)
return;
{ // Read .gs content
std::string f(lpszCmdLine);
bool is_xz = (f.size() >= 4) && (f.compare(f.size()-3, 3, ".xz") == 0);
bool is_xz = (f.size() >= 4) && (f.compare(f.size() - 3, 3, ".xz") == 0);
if (is_xz)
f.replace(f.end()-6, f.end(), "_repack.gs");
f.replace(f.end() - 6, f.end(), "_repack.gs");
else
f.replace(f.end()-3, f.end(), "_repack.gs");
f.replace(f.end() - 3, f.end(), "_repack.gs");
GSDumpFile* file = is_xz
? (GSDumpFile*) new GSDumpLzma(lpszCmdLine, repack_dump ? f.c_str() : nullptr)
@ -1393,57 +1425,57 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
file->Read(fd.data, fd.size);
GSfreeze(FREEZE_LOAD, &fd);
delete [] fd.data;
delete[] fd.data;
file->Read(regs, 0x2000);
uint8 type;
while(file->Read(&type, 1))
while (file->Read(&type, 1))
{
Packet* p = new Packet();
p->type = type;
switch(type)
switch (type)
{
case 0:
file->Read(&p->param, 1);
file->Read(&p->size, 4);
switch(p->param)
{
case 0:
p->buff.resize(0x4000);
p->addr = 0x4000 - p->size;
file->Read(&p->buff[p->addr], p->size);
file->Read(&p->param, 1);
file->Read(&p->size, 4);
switch (p->param)
{
case 0:
p->buff.resize(0x4000);
p->addr = 0x4000 - p->size;
file->Read(&p->buff[p->addr], p->size);
break;
case 1:
case 2:
case 3:
p->buff.resize(p->size);
file->Read(&p->buff[0], p->size);
break;
}
break;
case 1:
case 2:
case 3:
p->buff.resize(p->size);
file->Read(&p->buff[0], p->size);
file->Read(&p->param, 1);
frame_number++;
break;
}
break;
case 2:
file->Read(&p->size, 4);
case 1:
file->Read(&p->param, 1);
frame_number++;
break;
break;
case 3:
p->buff.resize(0x2000);
case 2:
file->Read(&p->size, 4);
file->Read(&p->buff[0], 0x2000);
break;
case 3:
p->buff.resize(0x2000);
file->Read(&p->buff[0], 0x2000);
break;
break;
}
packets.push_back(p);
@ -1463,17 +1495,17 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
// Init vsync stuff
GSvsync(1);
while(finished > 0)
while (finished > 0)
{
for(auto i = packets.begin(); i != packets.end(); i++)
for (auto i = packets.begin(); i != packets.end(); i++)
{
Packet* p = *i;
switch(p->type)
switch (p->type)
{
case 0:
switch(p->param)
switch (p->param)
{
case 0: GSgifTransfer1(&p->buff[0], p->addr); break;
case 1: GSgifTransfer2(&p->buff[0], p->size / 16); break;
@ -1492,7 +1524,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
case 2:
if(buff.size() < p->size) buff.resize(p->size);
if (buff.size() < p->size)
buff.resize(p->size);
GSreadFIFO2(&buff[0], p->size / 16);
@ -1506,11 +1539,16 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
}
}
if (finished >= 200) {
if (finished >= 200)
{
; // Nop for Nvidia Profiler
} else if (finished > 90) {
}
else if (finished > 90)
{
sleep(1);
} else {
}
else
{
finished--;
}
}
@ -1520,13 +1558,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
#ifdef ENABLE_OGL_DEBUG_MEM_BW
unsigned long total_frame_nb = std::max(1l, frame_number) << 10;
fprintf(stderr, "memory bandwith. T: %f KB/f. V: %f KB/f. U: %f KB/f\n",
(float)g_real_texture_upload_byte/(float)total_frame_nb,
(float)g_vertex_upload_byte/(float)total_frame_nb,
(float)g_uniform_upload_byte/(float)total_frame_nb
);
(float)g_real_texture_upload_byte / (float)total_frame_nb,
(float)g_vertex_upload_byte / (float)total_frame_nb,
(float)g_uniform_upload_byte / (float)total_frame_nb);
#endif
for(auto i = packets.begin(); i != packets.end(); i++)
for (auto i = packets.begin(); i != packets.end(); i++)
{
delete *i;
}

File diff suppressed because it is too large Load Diff

View File

@ -21,28 +21,29 @@
#pragma once
template<int i> class GSAlignedClass
template <int i>
class GSAlignedClass
{
public:
GSAlignedClass() {}
virtual ~GSAlignedClass() {}
void* operator new (size_t size)
void* operator new(size_t size)
{
return _aligned_malloc(size, i);
}
void operator delete (void* p)
void operator delete(void* p)
{
_aligned_free(p);
}
void* operator new [] (size_t size)
void* operator new[](size_t size)
{
return _aligned_malloc(size, i);
}
void operator delete [] (void* p)
void operator delete[](void* p)
{
_aligned_free(p);
}

File diff suppressed because it is too large Load Diff

View File

@ -26,33 +26,45 @@
#ifdef _WIN32
class CPinInfo : public PIN_INFO {
class CPinInfo : public PIN_INFO
{
public:
CPinInfo() { pFilter = NULL; }
~CPinInfo() { if (pFilter) pFilter->Release(); }
~CPinInfo()
{
if (pFilter)
pFilter->Release();
}
};
class CFilterInfo : public FILTER_INFO {
class CFilterInfo : public FILTER_INFO
{
public:
CFilterInfo() { pGraph = NULL; }
~CFilterInfo() { if (pGraph) pGraph->Release(); }
~CFilterInfo()
{
if (pGraph)
pGraph->Release();
}
};
#define BeginEnumFilters(pFilterGraph, pEnumFilters, pBaseFilter) \
{CComPtr<IEnumFilters> pEnumFilters; \
if(pFilterGraph && SUCCEEDED(pFilterGraph->EnumFilters(&pEnumFilters))) \
{ \
for(CComPtr<IBaseFilter> pBaseFilter; S_OK == pEnumFilters->Next(1, &pBaseFilter, 0); pBaseFilter = NULL) \
CComPtr<IEnumFilters> pEnumFilters; \
if(pFilterGraph && SUCCEEDED(pFilterGraph->EnumFilters(&pEnumFilters))) \
{ \
for(CComPtr<IBaseFilter> pBaseFilter; S_OK == pEnumFilters->Next(1, &pBaseFilter, 0); pBaseFilter = NULL) \
{
#define EndEnumFilters }}}
#define BeginEnumPins(pBaseFilter, pEnumPins, pPin) \
{CComPtr<IEnumPins> pEnumPins; \
if(pBaseFilter && SUCCEEDED(pBaseFilter->EnumPins(&pEnumPins))) \
{ \
for(CComPtr<IPin> pPin; S_OK == pEnumPins->Next(1, &pPin, 0); pPin = NULL) \
CComPtr<IEnumPins> pEnumPins; \
if(pBaseFilter && SUCCEEDED(pBaseFilter->EnumPins(&pEnumPins))) \
{ \
for(CComPtr<IPin> pPin; S_OK == pEnumPins->Next(1, &pPin, 0); pPin = NULL) \
{
#define EndEnumPins }}}
@ -76,9 +88,9 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
STDMETHODIMP NonDelegatingQueryInterface(REFIID riid, void** ppv)
{
return
riid == __uuidof(IGSSource) ? GetInterface((IGSSource*)this, ppv) :
__super::NonDelegatingQueryInterface(riid, ppv);
return riid == __uuidof(IGSSource)
? GetInterface((IGSSource*)this, ppv)
: __super::NonDelegatingQueryInterface(riid, ppv);
}
class GSSourceOutputPin : public CBaseOutputPin
@ -126,8 +138,10 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 4;
mt.SetFormat((uint8*)&vih, sizeof(vih));
if(colorspace == 1) m_mts.insert(m_mts.begin(), mt);
else m_mts.push_back(mt);
if (colorspace == 1)
m_mts.insert(m_mts.begin(), mt);
else
m_mts.push_back(mt);
}
HRESULT GSSourceOutputPin::DecideBufferSize(IMemAllocator* pAlloc, ALLOCATOR_PROPERTIES* pProperties)
@ -141,12 +155,12 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
ALLOCATOR_PROPERTIES Actual;
if(FAILED(hr = pAlloc->SetProperties(pProperties, &Actual)))
if (FAILED(hr = pAlloc->SetProperties(pProperties, &Actual)))
{
return hr;
}
if(Actual.cbBuffer < pProperties->cbBuffer)
if (Actual.cbBuffer < pProperties->cbBuffer)
{
return E_FAIL;
}
@ -156,11 +170,11 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
return S_OK;
}
HRESULT CheckMediaType(const CMediaType* pmt)
HRESULT CheckMediaType(const CMediaType* pmt)
{
for(const auto &mt : m_mts)
for (const auto& mt : m_mts)
{
if(mt.majortype == pmt->majortype && mt.subtype == pmt->subtype)
if (mt.majortype == pmt->majortype && mt.subtype == pmt->subtype)
{
return S_OK;
}
@ -169,12 +183,14 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
return E_FAIL;
}
HRESULT GetMediaType(int i, CMediaType* pmt)
HRESULT GetMediaType(int i, CMediaType* pmt)
{
CheckPointer(pmt, E_POINTER);
if(i < 0) return E_INVALIDARG;
if(i > 1) return VFW_S_NO_MORE_ITEMS;
if (i < 0)
return E_INVALIDARG;
if (i > 1)
return VFW_S_NO_MORE_ITEMS;
*pmt = m_mts[i];
@ -195,7 +211,6 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
GSSourceOutputPin* m_output;
public:
GSSource(int w, int h, float fps, IUnknown* pUnk, HRESULT& hr, int colorspace)
: CBaseFilter("GSSource", pUnk, this, __uuidof(this), &hr)
, m_output(NULL)
@ -234,14 +249,14 @@ public:
STDMETHODIMP DeliverFrame(const void* bits, int pitch, bool rgba)
{
if(!m_output || !m_output->IsConnected())
if (!m_output || !m_output->IsConnected())
{
return E_UNEXPECTED;
}
CComPtr<IMediaSample> sample;
if(FAILED(m_output->GetDeliveryBuffer(&sample, NULL, NULL, 0)))
if (FAILED(m_output->GetDeliveryBuffer(&sample, NULL, NULL, 0)))
{
return E_FAIL;
}
@ -263,7 +278,7 @@ public:
int h = m_size.y;
int srcpitch = pitch;
if(mt.subtype == MEDIASUBTYPE_YUY2)
if (mt.subtype == MEDIASUBTYPE_YUY2)
{
int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 2;
@ -271,7 +286,7 @@ public:
GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f);
GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f);
if(!rgba)
if (!rgba)
{
ys = ys.zyxw();
us = us.zyxw();
@ -280,12 +295,12 @@ public:
const GSVector4 offset(16, 128, 16, 128);
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
for (int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
{
uint32* s = (uint32*)src;
uint16* d = (uint16*)dst;
for(int i = 0; i < w; i += 2)
for (int i = 0; i < w; i += 2)
{
GSVector4 c0 = GSVector4::rgba32(s[i + 0]);
GSVector4 c1 = GSVector4::rgba32(s[i + 1]);
@ -300,40 +315,40 @@ public:
}
}
}
else if(mt.subtype == MEDIASUBTYPE_RGB32)
else if (mt.subtype == MEDIASUBTYPE_RGB32)
{
int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 4;
dst += dstpitch * (h - 1);
dstpitch = -dstpitch;
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
for (int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
{
if(rgba)
if (rgba)
{
#if _M_SSE >= 0x301
#if _M_SSE >= 0x301
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
GSVector4i mask(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
for(int i = 0, w4 = w >> 2; i < w4; i++)
for (int i = 0, w4 = w >> 2; i < w4; i++)
{
d[i] = s[i].shuffle8(mask);
}
#else
#else
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
for(int i = 0, w4 = w >> 2; i < w4; i++)
for (int i = 0, w4 = w >> 2; i < w4; i++)
{
d[i] = ((s[i] & 0x00ff0000) >> 16) | ((s[i] & 0x000000ff) << 16) | (s[i] & 0x0000ff00);
}
#endif
#endif
}
else
{
@ -346,7 +361,7 @@ public:
return E_FAIL;
}
if(FAILED(m_output->Deliver(sample)))
if (FAILED(m_output->Deliver(sample)))
{
return E_FAIL;
}
@ -364,22 +379,23 @@ public:
static IPin* GetFirstPin(IBaseFilter* pBF, PIN_DIRECTION dir)
{
if(!pBF) return(NULL);
if (!pBF)
return nullptr;
BeginEnumPins(pBF, pEP, pPin)
{
PIN_DIRECTION dir2;
pPin->QueryDirection(&dir2);
if(dir == dir2)
if (dir == dir2)
{
IPin* pRet = pPin.Detach();
pRet->Release();
return(pRet);
return pRet;
}
}
EndEnumPins
return(NULL);
return nullptr;
}
#endif
@ -390,7 +406,7 @@ static IPin* GetFirstPin(IBaseFilter* pBF, PIN_DIRECTION dir)
GSCapture::GSCapture()
: m_capturing(false), m_frame(0)
, m_out_dir("/tmp/GSdx_Capture") // FIXME Later add an option
, m_out_dir("/tmp/GSdx_Capture") // FIXME Later add an option
{
m_out_dir = theApp.GetConfigS("capture_out_dir");
m_threads = theApp.GetConfigI("capture_threads");
@ -451,17 +467,17 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float
CComPtr<ICaptureGraphBuilder2> cgb;
CComPtr<IBaseFilter> mux;
if(FAILED(hr = m_graph.CoCreateInstance(CLSID_FilterGraph))
|| FAILED(hr = cgb.CoCreateInstance(CLSID_CaptureGraphBuilder2))
|| FAILED(hr = cgb->SetFiltergraph(m_graph))
|| FAILED(hr = cgb->SetOutputFileName(&MEDIASUBTYPE_Avi, std::wstring(dlg.m_filename.begin(), dlg.m_filename.end()).c_str(), &mux, NULL)))
if (FAILED(hr = m_graph.CoCreateInstance(CLSID_FilterGraph))
|| FAILED(hr = cgb.CoCreateInstance(CLSID_CaptureGraphBuilder2))
|| FAILED(hr = cgb->SetFiltergraph(m_graph))
|| FAILED(hr = cgb->SetOutputFileName(&MEDIASUBTYPE_Avi, std::wstring(dlg.m_filename.begin(), dlg.m_filename.end()).c_str(), &mux, NULL)))
{
return false;
}
m_src = new GSSource(m_size.x, m_size.y, fps, NULL, hr, dlg.m_colorspace);
if (dlg.m_enc==0)
if (dlg.m_enc == 0)
{
if (FAILED(hr = m_graph->AddFilter(m_src, L"Source")))
return false;
@ -470,14 +486,13 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float
}
else
{
if(FAILED(hr = m_graph->AddFilter(m_src, L"Source"))
|| FAILED(hr = m_graph->AddFilter(dlg.m_enc, L"Encoder")))
if (FAILED(hr = m_graph->AddFilter(m_src, L"Source")) || FAILED(hr = m_graph->AddFilter(dlg.m_enc, L"Encoder")))
{
return false;
}
if(FAILED(hr = m_graph->ConnectDirect(GetFirstPin(m_src, PINDIR_OUTPUT), GetFirstPin(dlg.m_enc, PINDIR_INPUT), NULL))
|| FAILED(hr = m_graph->ConnectDirect(GetFirstPin(dlg.m_enc, PINDIR_OUTPUT), GetFirstPin(mux, PINDIR_INPUT), NULL)))
if (FAILED(hr = m_graph->ConnectDirect(GetFirstPin(m_src, PINDIR_OUTPUT), GetFirstPin(dlg.m_enc, PINDIR_INPUT), NULL))
|| FAILED(hr = m_graph->ConnectDirect(GetFirstPin(dlg.m_enc, PINDIR_OUTPUT), GetFirstPin(mux, PINDIR_INPUT), NULL)))
{
return false;
}
@ -519,7 +534,8 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float
m_size.x = theApp.GetConfigI("CaptureWidth");
m_size.y = theApp.GetConfigI("CaptureHeight");
for(int i = 0; i < m_threads; i++) {
for (int i = 0; i < m_threads; i++)
{
m_workers.push_back(std::unique_ptr<GSPng::Worker>(new GSPng::Worker(&GSPng::Process)));
}
@ -533,7 +549,7 @@ bool GSCapture::DeliverFrame(const void* bits, int pitch, bool rgba)
{
std::lock_guard<std::recursive_mutex> lock(m_lock);
if(bits == NULL || pitch == 0)
if (bits == NULL || pitch == 0)
{
ASSERT(0);
@ -542,7 +558,7 @@ bool GSCapture::DeliverFrame(const void* bits, int pitch, bool rgba)
#ifdef _WIN32
if(m_src)
if (m_src)
{
CComQIPtr<IGSSource>(m_src)->DeliverFrame(bits, pitch, rgba);
@ -553,7 +569,7 @@ bool GSCapture::DeliverFrame(const void* bits, int pitch, bool rgba)
std::string out_file = m_out_dir + format("/frame.%010d.png", m_frame);
//GSPng::Save(GSPng::RGB_PNG, out_file, (uint8*)bits, m_size.x, m_size.y, pitch, m_compression_level);
m_workers[m_frame%m_threads]->Push(std::make_shared<GSPng::Transaction>(GSPng::RGB_PNG, out_file, static_cast<const uint8*>(bits), m_size.x, m_size.y, pitch, m_compression_level));
m_workers[m_frame % m_threads]->Push(std::make_shared<GSPng::Transaction>(GSPng::RGB_PNG, out_file, static_cast<const uint8*>(bits), m_size.x, m_size.y, pitch, m_compression_level));
m_frame++;
@ -571,14 +587,14 @@ bool GSCapture::EndCapture()
#ifdef _WIN32
if(m_src)
if (m_src)
{
CComQIPtr<IGSSource>(m_src)->DeliverEOS();
m_src = NULL;
}
if(m_graph)
if (m_graph)
{
CComQIPtr<IMediaControl>(m_graph)->Stop();

View File

@ -37,17 +37,17 @@ class GSCapture
std::string m_out_dir;
int m_threads;
#ifdef _WIN32
#ifdef _WIN32
CComPtr<IGraphBuilder> m_graph;
CComPtr<IBaseFilter> m_src;
#elif defined(__unix__)
#elif defined(__unix__)
std::vector<std::unique_ptr<GSPng::Worker>> m_workers;
int m_compression_level;
#endif
#endif
public:
GSCapture();
@ -57,6 +57,6 @@ public:
bool DeliverFrame(const void* bits, int pitch, bool rgba);
bool EndCapture();
bool IsCapturing() {return m_capturing;}
GSVector2i GetSize() {return m_size;}
bool IsCapturing() { return m_capturing; }
GSVector2i GetSize() { return m_size; }
};

View File

@ -71,9 +71,12 @@ class alignas(32) GSClut : public GSAlignedClass<32>
void WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template<int n> void WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template<int n> void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template<int n> void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template <int n>
void WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template <int n>
void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template <int n>
void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
@ -108,8 +111,8 @@ public:
void Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void GetAlphaMinMax32(int& amin, int& amax);
uint32 operator [] (size_t i) const {return m_buff32[i];}
uint32 operator[](size_t i) const { return m_buff32[i]; }
operator const uint32*() const {return m_buff32;}
operator const uint64*() const {return m_buff64;}
operator const uint32*() const { return m_buff32; }
operator const uint64*() const { return m_buff64; }
};

View File

@ -32,7 +32,7 @@ GSCodeBuffer::GSCodeBuffer(size_t blocksize)
GSCodeBuffer::~GSCodeBuffer()
{
for(auto buffer : m_buffers)
for (auto buffer : m_buffers)
{
vmfree(buffer, m_blocksize);
}
@ -45,7 +45,7 @@ void* GSCodeBuffer::GetBuffer(size_t size)
size = (size + 15) & ~15;
if(m_ptr == NULL || m_pos + size > m_blocksize)
if (m_ptr == NULL || m_pos + size > m_blocksize)
{
m_ptr = (uint8*)vmalloc(m_blocksize, true);

View File

@ -357,7 +357,7 @@ CRC::Game CRC::m_games[] =
{0xEF06DBD6, SakuraWarsSoLongMyLove, JP, 0}, // cutie comment
{0xDD41054D, SakuraWarsSoLongMyLove, US, 0}, // cutie comment
{0xC2E3A7A4, SakuraWarsSoLongMyLove, KO, 0},
{0x4A4B623A, FightingBeautyWulong, JP,0}, // cutie comment
{0x4A4B623A, FightingBeautyWulong, JP, 0}, // cutie comment
{0x5AC7E79C, TouristTrophy, CH, 0}, // cutie comment
{0xFF9C0E93, TouristTrophy, US, 0},
{0xCA9AA903, TouristTrophy, EU, 0},
@ -441,17 +441,17 @@ CRC::Game CRC::m_games[] =
{0x972611BB, FIFA05, US, 0},
{0x972719A3, FIFA05, EU, 0},
{0xC5473413, HarryPotterATCOS, NoRegion, 0}, // EU and US versions have the same CRC - Chamber Of Secrets
{0xE1963055, HarryPotterATCOS, JP, 0 }, // Harry Potter to Himitsu no Heya
{0xE90BE9F8, HarryPotterATCOS, JP, 0 }, // Coca Cola original Version
{0xB38CC628, HarryPotterATGOF, US, 0 },
{0xCDE017A7, HarryPotterATGOF, KO, 0 },
{0xB18DC525, HarryPotterATGOF, EU, 0 },
{0x9C3A84F4, HarryPotterATHBP, US, 0 }, // Half-Blood Prince
{0xCB598BC2, HarryPotterATHBP, EU, 0 },
{0x51E019BC, HarryPotterATPOA, NoRegion, 0 }, // EU and US versions have the same CRC - Prisoner of Azkaban
{0x99A8B4FF, HarryPotterATPOA, KO, 0 },
{0xA8901AD6, HarryPotterATPOA, JP, 0 }, // Harry Potter to Azkaban no Shuujin
{0x51E417AA, HarryPotterATPOA, EU, 0 },
{0xE1963055, HarryPotterATCOS, JP, 0}, // Harry Potter to Himitsu no Heya
{0xE90BE9F8, HarryPotterATCOS, JP, 0}, // Coca Cola original Version
{0xB38CC628, HarryPotterATGOF, US, 0},
{0xCDE017A7, HarryPotterATGOF, KO, 0},
{0xB18DC525, HarryPotterATGOF, EU, 0},
{0x9C3A84F4, HarryPotterATHBP, US, 0}, // Half-Blood Prince
{0xCB598BC2, HarryPotterATHBP, EU, 0},
{0x51E019BC, HarryPotterATPOA, NoRegion, 0}, // EU and US versions have the same CRC - Prisoner of Azkaban
{0x99A8B4FF, HarryPotterATPOA, KO, 0},
{0xA8901AD6, HarryPotterATPOA, JP, 0}, // Harry Potter to Azkaban no Shuujin
{0x51E417AA, HarryPotterATPOA, EU, 0},
{0x4C01B1B0, HarryPotterOOTP, US, 0}, // Order Of The Phoenix
{0x01A9BF0E, HarryPotterOOTP, EU, 0},
{0x230CB71D, SoulReaver2, US, 0},
@ -516,9 +516,9 @@ CRC::Game CRC::m_games[] =
std::map<uint32, CRC::Game*> CRC::m_map;
std::string ToLower( std::string str )
std::string ToLower(std::string str)
{
transform( str.begin(), str.end(), str.begin(), ::tolower);
transform(str.begin(), str.end(), str.begin(), ::tolower);
return str;
}
@ -536,18 +536,19 @@ bool IsCrcExcluded(std::string exclusionList, uint32 crc)
CRC::Game CRC::Lookup(uint32 crc)
{
printf("GSdx Lookup CRC:%08X\n", crc);
if(m_map.empty())
if (m_map.empty())
{
std::string exclusions = theApp.GetConfigS("CrcHacksExclusions");
if (exclusions.length() != 0)
printf( "GSdx: CrcHacksExclusions: %s\n", exclusions.c_str() );
printf("GSdx: CrcHacksExclusions: %s\n", exclusions.c_str());
int crcDups = 0;
for(size_t i = 0; i < countof(m_games); i++)
for (size_t i = 0; i < countof(m_games); i++)
{
if( !IsCrcExcluded( exclusions, m_games[i].crc ) ){
if(m_map[m_games[i].crc]){
printf("[FIXME] GSdx: Duplicate CRC: 0x%08X: (game-id/region-id) %d/%d overrides %d/%d\n"
, m_games[i].crc, m_games[i].title, m_games[i].region, m_map[m_games[i].crc]->title, m_map[m_games[i].crc]->region);
if (!IsCrcExcluded(exclusions, m_games[i].crc))
{
if (m_map[m_games[i].crc])
{
printf("[FIXME] GSdx: Duplicate CRC: 0x%08X: (game-id/region-id) %d/%d overrides %d/%d\n", m_games[i].crc, m_games[i].title, m_games[i].region, m_map[m_games[i].crc]->title, m_map[m_games[i].crc]->region);
crcDups++;
}
@ -556,13 +557,13 @@ CRC::Game CRC::Lookup(uint32 crc)
//else
// printf( "GSdx: excluding CRC hack for 0x%08x\n", m_games[i].crc );
}
if(crcDups)
if (crcDups)
printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups);
}
auto i = m_map.find(crc);
if(i != m_map.end())
if (i != m_map.end())
{
return *i->second;
}

View File

@ -29,24 +29,31 @@ static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv)
int uv = br;
if(wm == CLAMP_CLAMP)
if (wm == CLAMP_CLAMP)
{
if(uv > limit) uv = limit;
if (uv > limit)
uv = limit;
}
else if(wm == CLAMP_REPEAT)
else if (wm == CLAMP_REPEAT)
{
if(tl < 0) uv = limit; // wrap around
else if(uv > limit) uv = limit;
if (tl < 0)
uv = limit; // wrap around
else if (uv > limit)
uv = limit;
}
else if(wm == CLAMP_REGION_CLAMP)
else if (wm == CLAMP_REGION_CLAMP)
{
if(uv < minuv) uv = minuv;
if(uv > maxuv) uv = maxuv;
if (uv < minuv)
uv = minuv;
if (uv > maxuv)
uv = maxuv;
}
else if(wm == CLAMP_REGION_REPEAT)
else if (wm == CLAMP_REGION_REPEAT)
{
if(tl < 0) uv = minuv | maxuv; // wrap around, just use (any & mask) | fix
else uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask)
if (tl < 0)
uv = minuv | maxuv; // wrap around, just use (any & mask) | fix
else
uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask)
}
return uv;
@ -54,7 +61,7 @@ static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv)
static int reduce(int uv, int size)
{
while(size > 3 && (1 << (size - 1)) >= uv + 1)
while (size > 3 && (1 << (size - 1)) >= uv + 1)
{
size--;
}
@ -64,7 +71,7 @@ static int reduce(int uv, int size)
static int extend(int uv, int size)
{
while(size < 10 && (1 << size) < uv + 1)
while (size < 10 && (1 << size) < uv + 1)
{
size++;
}
@ -74,7 +81,8 @@ static int extend(int uv, int size)
GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap)
{
if(mipmap) return TEX0; // no mipmaping allowed
if (mipmap)
return TEX0; // no mipmaping allowed
// find the optimal value for TW/TH by analyzing vertex trace and clamping values, extending only for region modes where uv may be outside
@ -91,7 +99,7 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear,
GSVector4 uvf = st;
if(linear)
if (linear)
{
uvf += GSVector4(-0.5f, 0.5f).xxyy();
}
@ -101,23 +109,23 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear,
uv.x = findmax(uv.x, uv.z, (1 << tw) - 1, wms, minu, maxu);
uv.y = findmax(uv.y, uv.w, (1 << th) - 1, wmt, minv, maxv);
if(tw + th >= 19) // smaller sizes aren't worth, they just create multiple entries in the textue cache and the saved memory is less
if (tw + th >= 19) // smaller sizes aren't worth, they just create multiple entries in the textue cache and the saved memory is less
{
tw = reduce(uv.x, tw);
th = reduce(uv.y, th);
}
if(wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT)
if (wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT)
{
tw = extend(uv.x, tw);
}
if(wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT)
if (wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT)
{
th = extend(uv.y, th);
}
if((theApp.GetCurrentRendererType() == GSRendererType::OGL_SW) && ((int)TEX0.TW != tw || (int)TEX0.TH != th))
if ((theApp.GetCurrentRendererType() == GSRendererType::OGL_SW) && ((int)TEX0.TW != tw || (int)TEX0.TH != th))
{
GL_DBG("FixedTEX0 %05x %d %d tw %d=>%d th %d=>%d st (%.0f,%.0f,%.0f,%.0f) uvmax %d,%d wm %d,%d (%d,%d,%d,%d)",
(int)TEX0.TBP0, (int)TEX0.TBW, (int)TEX0.PSM,

View File

@ -31,19 +31,19 @@
class alignas(32) GSDrawingContext
{
public:
GIFRegXYOFFSET XYOFFSET;
GIFRegTEX0 TEX0;
GIFRegTEX1 TEX1;
GIFRegTEX2 TEX2;
GIFRegCLAMP CLAMP;
GIFRegMIPTBP1 MIPTBP1;
GIFRegMIPTBP2 MIPTBP2;
GIFRegSCISSOR SCISSOR;
GIFRegALPHA ALPHA;
GIFRegTEST TEST;
GIFRegFBA FBA;
GIFRegFRAME FRAME;
GIFRegZBUF ZBUF;
GIFRegXYOFFSET XYOFFSET;
GIFRegTEX0 TEX0;
GIFRegTEX1 TEX1;
GIFRegTEX2 TEX2;
GIFRegCLAMP CLAMP;
GIFRegMIPTBP1 MIPTBP1;
GIFRegMIPTBP2 MIPTBP2;
GIFRegSCISSOR SCISSOR;
GIFRegALPHA ALPHA;
GIFRegTEST TEST;
GIFRegFBA FBA;
GIFRegFRAME FRAME;
GIFRegZBUF ZBUF;
struct
{
@ -64,19 +64,19 @@ public:
struct
{
GIFRegXYOFFSET XYOFFSET;
GIFRegTEX0 TEX0;
GIFRegTEX1 TEX1;
GIFRegTEX2 TEX2;
GIFRegCLAMP CLAMP;
GIFRegMIPTBP1 MIPTBP1;
GIFRegMIPTBP2 MIPTBP2;
GIFRegSCISSOR SCISSOR;
GIFRegALPHA ALPHA;
GIFRegTEST TEST;
GIFRegFBA FBA;
GIFRegFRAME FRAME;
GIFRegZBUF ZBUF;
GIFRegXYOFFSET XYOFFSET;
GIFRegTEX0 TEX0;
GIFRegTEX1 TEX1;
GIFRegTEX2 TEX2;
GIFRegCLAMP CLAMP;
GIFRegMIPTBP1 MIPTBP1;
GIFRegMIPTBP2 MIPTBP2;
GIFRegSCISSOR SCISSOR;
GIFRegALPHA ALPHA;
GIFRegTEST TEST;
GIFRegFBA FBA;
GIFRegFRAME FRAME;
GIFRegZBUF ZBUF;
} stack;
bool m_fixed_tex0;
@ -142,7 +142,7 @@ public:
bool DepthWrite() const
{
if(TEST.ATE && TEST.ATST == ATST_NEVER && TEST.AFAIL != AFAIL_ZB_ONLY) // alpha test, all pixels fail, z buffer is not updated
if (TEST.ATE && TEST.ATST == ATST_NEVER && TEST.AFAIL != AFAIL_ZB_ONLY) // alpha test, all pixels fail, z buffer is not updated
{
return false;
}
@ -152,7 +152,7 @@ public:
GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false);
void ComputeFixedTEX0(const GSVector4& st);
bool HasFixedTEX0() const { return m_fixed_tex0;}
bool HasFixedTEX0() const { return m_fixed_tex0; }
// Save & Restore before/after draw allow to correct/optimize current register for current draw
// Note: we could avoid the restore part if all renderer code is updated to use a local copy instead
@ -197,120 +197,121 @@ public:
{
// Append on purpose so env + context are merged into a single file
FILE* fp = fopen(filename.c_str(), "at");
if (!fp) return;
if (!fp)
return;
fprintf(fp, "XYOFFSET\n"
"\tX:%u\n"
"\tY:%u\n\n"
, XYOFFSET.OFX, XYOFFSET.OFY);
"\tX:%u\n"
"\tY:%u\n\n"
, XYOFFSET.OFX, XYOFFSET.OFY);
fprintf(fp, "MIPTBP1\n"
"\tBP1:0x%llx\n"
"\tBW1:%llu\n"
"\tBP2:0x%llx\n"
"\tBW2:%llu\n"
"\tBP3:0x%llx\n"
"\tBW3:%llu\n\n"
, MIPTBP1.TBP1, MIPTBP1.TBW1, MIPTBP1.TBP2, MIPTBP1.TBW2, MIPTBP1.TBP3, MIPTBP1.TBW3);
"\tBP1:0x%llx\n"
"\tBW1:%llu\n"
"\tBP2:0x%llx\n"
"\tBW2:%llu\n"
"\tBP3:0x%llx\n"
"\tBW3:%llu\n\n"
, MIPTBP1.TBP1, MIPTBP1.TBW1, MIPTBP1.TBP2, MIPTBP1.TBW2, MIPTBP1.TBP3, MIPTBP1.TBW3);
fprintf(fp, "MIPTBP2\n"
"\tBP4:0x%llx\n"
"\tBW4:%llu\n"
"\tBP5:0x%llx\n"
"\tBW5:%llu\n"
"\tBP6:0x%llx\n"
"\tBW6:%llu\n\n"
, MIPTBP2.TBP4, MIPTBP2.TBW4, MIPTBP2.TBP5, MIPTBP2.TBW5, MIPTBP2.TBP6, MIPTBP2.TBW6);
"\tBP4:0x%llx\n"
"\tBW4:%llu\n"
"\tBP5:0x%llx\n"
"\tBW5:%llu\n"
"\tBP6:0x%llx\n"
"\tBW6:%llu\n\n"
, MIPTBP2.TBP4, MIPTBP2.TBW4, MIPTBP2.TBP5, MIPTBP2.TBW5, MIPTBP2.TBP6, MIPTBP2.TBW6);
fprintf(fp, "TEX0\n"
"\tTBP0:0x%x\n"
"\tTBW:%u\n"
"\tPSM:0x%x\n"
"\tTW:%u\n"
"\tTCC:%u\n"
"\tTFX:%u\n"
"\tCBP:0x%x\n"
"\tCPSM:0x%x\n"
"\tCSM:%u\n"
"\tCSA:%u\n"
"\tCLD:%u\n"
"\tTH:%llu\n\n"
, TEX0.TBP0, TEX0.TBW, TEX0.PSM, TEX0.TW, TEX0.TCC, TEX0.TFX, TEX0.CBP, TEX0.CPSM, TEX0.CSM, TEX0.CSA, TEX0.CLD, TEX0.TH);
"\tTBP0:0x%x\n"
"\tTBW:%u\n"
"\tPSM:0x%x\n"
"\tTW:%u\n"
"\tTCC:%u\n"
"\tTFX:%u\n"
"\tCBP:0x%x\n"
"\tCPSM:0x%x\n"
"\tCSM:%u\n"
"\tCSA:%u\n"
"\tCLD:%u\n"
"\tTH:%llu\n\n"
, TEX0.TBP0, TEX0.TBW, TEX0.PSM, TEX0.TW, TEX0.TCC, TEX0.TFX, TEX0.CBP, TEX0.CPSM, TEX0.CSM, TEX0.CSA, TEX0.CLD, TEX0.TH);
fprintf(fp, "TEX1\n"
"\tLCM:%u\n"
"\tMXL:%u\n"
"\tMMAG:%u\n"
"\tMMIN:%u\n"
"\tMTBA:%u\n"
"\tL:%u\n"
"\tK:%d\n\n"
, TEX1.LCM, TEX1.MXL, TEX1.MMAG, TEX1.MMIN, TEX1.MTBA, TEX1.L, TEX1.K);
"\tLCM:%u\n"
"\tMXL:%u\n"
"\tMMAG:%u\n"
"\tMMIN:%u\n"
"\tMTBA:%u\n"
"\tL:%u\n"
"\tK:%d\n\n"
, TEX1.LCM, TEX1.MXL, TEX1.MMAG, TEX1.MMIN, TEX1.MTBA, TEX1.L, TEX1.K);
fprintf(fp, "TEX2\n"
"\tPSM:0x%x\n"
"\tCBP:0x%x\n"
"\tCPSM:0x%x\n"
"\tCSM:%u\n"
"\tCSA:%u\n"
"\tCLD:%u\n\n"
, TEX2.PSM, TEX2.CBP, TEX2.CPSM, TEX2.CSM, TEX2.CSA, TEX2.CLD);
"\tPSM:0x%x\n"
"\tCBP:0x%x\n"
"\tCPSM:0x%x\n"
"\tCSM:%u\n"
"\tCSA:%u\n"
"\tCLD:%u\n\n"
, TEX2.PSM, TEX2.CBP, TEX2.CPSM, TEX2.CSM, TEX2.CSA, TEX2.CLD);
fprintf(fp, "CLAMP\n"
"\tWMS:%u\n"
"\tWMT:%u\n"
"\tMINU:%u\n"
"\tMAXU:%u\n"
"\tMAXV:%u\n"
"\tMINV:%llu\n\n"
, CLAMP.WMS, CLAMP.WMT, CLAMP.MINU, CLAMP.MAXU, CLAMP.MAXV, CLAMP.MINV);
"\tWMS:%u\n"
"\tWMT:%u\n"
"\tMINU:%u\n"
"\tMAXU:%u\n"
"\tMAXV:%u\n"
"\tMINV:%llu\n\n"
, CLAMP.WMS, CLAMP.WMT, CLAMP.MINU, CLAMP.MAXU, CLAMP.MAXV, CLAMP.MINV);
// TODO mimmap? (yes I'm lazy)
fprintf(fp, "SCISSOR\n"
"\tX0:%u\n"
"\tX1:%u\n"
"\tY0:%u\n"
"\tY1:%u\n\n"
, SCISSOR.SCAX0, SCISSOR.SCAX1, SCISSOR.SCAY0, SCISSOR.SCAY1);
"\tX0:%u\n"
"\tX1:%u\n"
"\tY0:%u\n"
"\tY1:%u\n\n"
, SCISSOR.SCAX0, SCISSOR.SCAX1, SCISSOR.SCAY0, SCISSOR.SCAY1);
fprintf(fp, "ALPHA\n"
"\tA:%u\n"
"\tB:%u\n"
"\tC:%u\n"
"\tD:%u\n"
"\tFIX:%u\n"
, ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, ALPHA.FIX);
const char *col[3] = {"Cs", "Cd", "0"};
const char *alpha[3] = {"As", "Ad", "Af"};
"\tA:%u\n"
"\tB:%u\n"
"\tC:%u\n"
"\tD:%u\n"
"\tFIX:%u\n"
, ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, ALPHA.FIX);
const char* col[3] = {"Cs", "Cd", "0"};
const char* alpha[3] = {"As", "Ad", "Af"};
fprintf(fp, "\t=> (%s - %s) * %s + %s\n\n", col[ALPHA.A], col[ALPHA.B], alpha[ALPHA.C], col[ALPHA.D]);
fprintf(fp, "TEST\n"
"\tATE:%u\n"
"\tATST:%u\n"
"\tAREF:%u\n"
"\tAFAIL:%u\n"
"\tDATE:%u\n"
"\tDATM:%u\n"
"\tZTE:%u\n"
"\tZTST:%u\n\n"
, TEST.ATE, TEST.ATST, TEST.AREF, TEST.AFAIL, TEST.DATE, TEST.DATM, TEST.ZTE, TEST.ZTST);
"\tATE:%u\n"
"\tATST:%u\n"
"\tAREF:%u\n"
"\tAFAIL:%u\n"
"\tDATE:%u\n"
"\tDATM:%u\n"
"\tZTE:%u\n"
"\tZTST:%u\n\n"
, TEST.ATE, TEST.ATST, TEST.AREF, TEST.AFAIL, TEST.DATE, TEST.DATM, TEST.ZTE, TEST.ZTST);
fprintf(fp, "FBA\n"
"\tFBA:%u\n\n"
, FBA.FBA);
"\tFBA:%u\n\n"
, FBA.FBA);
fprintf(fp, "FRAME\n"
"\tFBP (*32):0x%x\n"
"\tFBW:%u\n"
"\tPSM:0x%x\n"
"\tFBMSK:0x%x\n\n"
, FRAME.FBP*32, FRAME.FBW, FRAME.PSM, FRAME.FBMSK);
"\tFBP (*32):0x%x\n"
"\tFBW:%u\n"
"\tPSM:0x%x\n"
"\tFBMSK:0x%x\n\n"
, FRAME.FBP * 32, FRAME.FBW, FRAME.PSM, FRAME.FBMSK);
fprintf(fp, "ZBUF\n"
"\tZBP (*32):0x%x\n"
"\tPSM:0x%x\n"
"\tZMSK:%u\n\n"
, ZBUF.ZBP*32, ZBUF.PSM, ZBUF.ZMSK);
"\tZBP (*32):0x%x\n"
"\tPSM:0x%x\n"
"\tZMSK:%u\n\n"
, ZBUF.ZBP * 32, ZBUF.PSM, ZBUF.ZMSK);
fclose(fp);
}

View File

@ -26,22 +26,22 @@
class alignas(32) GSDrawingEnvironment
{
public:
GIFRegPRIM PRIM;
GIFRegPRMODE PRMODE;
GIFRegPRMODECONT PRMODECONT;
GIFRegTEXCLUT TEXCLUT;
GIFRegSCANMSK SCANMSK;
GIFRegTEXA TEXA;
GIFRegFOGCOL FOGCOL;
GIFRegDIMX DIMX;
GIFRegDTHE DTHE;
GIFRegCOLCLAMP COLCLAMP;
GIFRegPABE PABE;
GIFRegBITBLTBUF BITBLTBUF;
GIFRegTRXDIR TRXDIR;
GIFRegTRXPOS TRXPOS;
GIFRegTRXREG TRXREG;
GSDrawingContext CTXT[2];
GIFRegPRIM PRIM;
GIFRegPRMODE PRMODE;
GIFRegPRMODECONT PRMODECONT;
GIFRegTEXCLUT TEXCLUT;
GIFRegSCANMSK SCANMSK;
GIFRegTEXA TEXA;
GIFRegFOGCOL FOGCOL;
GIFRegDIMX DIMX;
GIFRegDTHE DTHE;
GIFRegCOLCLAMP COLCLAMP;
GIFRegPABE PABE;
GIFRegBITBLTBUF BITBLTBUF;
GIFRegTRXDIR TRXDIR;
GIFRegTRXPOS TRXPOS;
GIFRegTRXREG TRXREG;
GSDrawingContext CTXT[2];
GSDrawingEnvironment()
{
@ -88,118 +88,118 @@ public:
void Dump(const std::string& filename)
{
FILE* fp = fopen(filename.c_str(), "wt");
if (!fp) return;
if (!fp)
return;
fprintf(fp, "PRIM\n"
"\tPRIM:%u\n"
"\tIIP:%u\n"
"\tTME:%u\n"
"\tFGE:%u\n"
"\tABE:%u\n"
"\tAA1:%u\n"
"\tFST:%u\n"
"\tCTXT:%u\n"
"\tFIX:%u\n\n"
, PRIM.PRIM, PRIM.IIP, PRIM.TME, PRIM.FGE, PRIM.ABE, PRIM.AA1, PRIM.FST, PRIM.CTXT, PRIM.FIX);
"\tPRIM:%u\n"
"\tIIP:%u\n"
"\tTME:%u\n"
"\tFGE:%u\n"
"\tABE:%u\n"
"\tAA1:%u\n"
"\tFST:%u\n"
"\tCTXT:%u\n"
"\tFIX:%u\n\n"
, PRIM.PRIM, PRIM.IIP, PRIM.TME, PRIM.FGE, PRIM.ABE, PRIM.AA1, PRIM.FST, PRIM.CTXT, PRIM.FIX);
fprintf(fp, "PRMODE (when AC=0)\n"
"\t_PRIM:%u\n"
"\tIIP:%u\n"
"\tTME:%u\n"
"\tFGE:%u\n"
"\tABE:%u\n"
"\tAA1:%u\n"
"\tFST:%u\n"
"\tCTXT:%u\n"
"\tFIX:%u\n\n"
, PRMODE._PRIM, PRMODE.IIP, PRMODE.TME, PRMODE.FGE, PRMODE.ABE, PRMODE.AA1, PRMODE.FST, PRMODE.CTXT, PRMODE.FIX);
"\t_PRIM:%u\n"
"\tIIP:%u\n"
"\tTME:%u\n"
"\tFGE:%u\n"
"\tABE:%u\n"
"\tAA1:%u\n"
"\tFST:%u\n"
"\tCTXT:%u\n"
"\tFIX:%u\n\n"
, PRMODE._PRIM, PRMODE.IIP, PRMODE.TME, PRMODE.FGE, PRMODE.ABE, PRMODE.AA1, PRMODE.FST, PRMODE.CTXT, PRMODE.FIX);
fprintf(fp, "PRMODECONT\n"
"\tAC:%u\n\n"
, PRMODECONT.AC);
"\tAC:%u\n\n"
, PRMODECONT.AC);
fprintf(fp, "TEXCLUT\n"
"\tCOU:%u\n"
"\tCBW:%u\n"
"\tCOV:%u\n\n"
, TEXCLUT.COU, TEXCLUT.CBW, TEXCLUT.COV);
"\tCOU:%u\n"
"\tCBW:%u\n"
"\tCOV:%u\n\n"
, TEXCLUT.COU, TEXCLUT.CBW, TEXCLUT.COV);
fprintf(fp, "SCANMSK\n"
"\tMSK:%u\n\n"
"\n"
, SCANMSK.MSK);
"\tMSK:%u\n\n"
"\n"
, SCANMSK.MSK);
fprintf(fp, "TEXA\n"
"\tAEM:%u\n"
"\tTA0:%u\n"
"\tTA1:%u\n\n"
, TEXA.AEM, TEXA.TA0, TEXA.TA1);
"\tAEM:%u\n"
"\tTA0:%u\n"
"\tTA1:%u\n\n"
, TEXA.AEM, TEXA.TA0, TEXA.TA1);
fprintf(fp, "FOGCOL\n"
"\tFCG:%u\n"
"\tFCB:%u\n"
"\tFCR:%u\n\n"
, FOGCOL.FCG, FOGCOL.FCB, FOGCOL.FCR);
"\tFCG:%u\n"
"\tFCB:%u\n"
"\tFCR:%u\n\n"
, FOGCOL.FCG, FOGCOL.FCB, FOGCOL.FCR);
fprintf(fp, "DIMX\n"
"\tDM22:%d\n"
"\tDM23:%d\n"
"\tDM31:%d\n"
"\tDM02:%d\n"
"\tDM21:%d\n"
"\tDM12:%d\n"
"\tDM03:%d\n"
"\tDM01:%d\n"
"\tDM33:%d\n"
"\tDM30:%d\n"
"\tDM11:%d\n"
"\tDM10:%d\n"
"\tDM20:%d\n"
"\tDM32:%d\n"
"\tDM00:%d\n"
"\tDM13:%d\n\n"
, DIMX.DM22, DIMX.DM23, DIMX.DM31, DIMX.DM02, DIMX.DM21, DIMX.DM12, DIMX.DM03, DIMX.DM01, DIMX.DM33, DIMX.DM30, DIMX.DM11, DIMX.DM10, DIMX.DM20, DIMX.DM32, DIMX.DM00, DIMX.DM13);
"\tDM22:%d\n"
"\tDM23:%d\n"
"\tDM31:%d\n"
"\tDM02:%d\n"
"\tDM21:%d\n"
"\tDM12:%d\n"
"\tDM03:%d\n"
"\tDM01:%d\n"
"\tDM33:%d\n"
"\tDM30:%d\n"
"\tDM11:%d\n"
"\tDM10:%d\n"
"\tDM20:%d\n"
"\tDM32:%d\n"
"\tDM00:%d\n"
"\tDM13:%d\n\n"
, DIMX.DM22, DIMX.DM23, DIMX.DM31, DIMX.DM02, DIMX.DM21, DIMX.DM12, DIMX.DM03, DIMX.DM01, DIMX.DM33, DIMX.DM30, DIMX.DM11, DIMX.DM10, DIMX.DM20, DIMX.DM32, DIMX.DM00, DIMX.DM13);
fprintf(fp, "DTHE\n"
"\tDTHE:%u\n\n"
, DTHE.DTHE);
"\tDTHE:%u\n\n"
, DTHE.DTHE);
fprintf(fp, "COLCLAMP\n"
"\tCLAMP:%u\n\n"
, COLCLAMP.CLAMP);
"\tCLAMP:%u\n\n"
, COLCLAMP.CLAMP);
fprintf(fp, "PABE\n"
"\tPABE:%u\n\n"
, PABE.PABE);
"\tPABE:%u\n\n"
, PABE.PABE);
fprintf(fp, "BITBLTBUF\n"
"\tSBW:%u\n"
"\tSBP:0x%x\n"
"\tSPSM:%u\n"
"\tDBW:%u\n"
"\tDPSM:%u\n"
"\tDBP:0x%x\n\n"
, BITBLTBUF.SBW, BITBLTBUF.SBP, BITBLTBUF.SPSM, BITBLTBUF.DBW, BITBLTBUF.DPSM, BITBLTBUF.DBP);
"\tSBW:%u\n"
"\tSBP:0x%x\n"
"\tSPSM:%u\n"
"\tDBW:%u\n"
"\tDPSM:%u\n"
"\tDBP:0x%x\n\n"
, BITBLTBUF.SBW, BITBLTBUF.SBP, BITBLTBUF.SPSM, BITBLTBUF.DBW, BITBLTBUF.DPSM, BITBLTBUF.DBP);
fprintf(fp, "TRXDIR\n"
"\tXDIR:%u\n\n"
, TRXDIR.XDIR);
"\tXDIR:%u\n\n",
TRXDIR.XDIR);
fprintf(fp, "TRXPOS\n"
"\tDIRY:%u\n"
"\tSSAY:%u\n"
"\tSSAX:%u\n"
"\tDIRX:%u\n"
"\tDSAX:%u\n"
"\tDSAY:%u\n\n"
, TRXPOS.DIRY, TRXPOS.SSAY, TRXPOS.SSAX, TRXPOS.DIRX, TRXPOS.DSAX, TRXPOS.DSAY);
"\tDIRY:%u\n"
"\tSSAY:%u\n"
"\tSSAX:%u\n"
"\tDIRX:%u\n"
"\tDSAX:%u\n"
"\tDSAY:%u\n\n"
, TRXPOS.DIRY, TRXPOS.SSAY, TRXPOS.SSAX, TRXPOS.DIRX, TRXPOS.DSAX, TRXPOS.DSAY);
fprintf(fp, "TRXREG\n"
"\tRRH:%u\n"
"\tRRW:%u\n\n"
, TRXREG.RRH, TRXREG.RRW);
"\tRRH:%u\n"
"\tRRW:%u\n\n"
, TRXREG.RRH, TRXREG.RRW);
fclose(fp);
}
};

View File

@ -33,7 +33,7 @@ GSDumpBase::GSDumpBase(const std::string& fn)
GSDumpBase::~GSDumpBase()
{
if(m_gs)
if (m_gs)
fclose(m_gs);
}
@ -83,7 +83,7 @@ bool GSDumpBase::VSync(int field, bool last, const GSPrivRegSet* regs)
return (++m_frames & 1) == 0 && last && (m_extra_frames < 0);
}
void GSDumpBase::Write(const void *data, size_t size)
void GSDumpBase::Write(const void* data, size_t size)
{
if (!m_gs || size == 0)
return;
@ -103,7 +103,7 @@ GSDump::GSDump(const std::string& fn, uint32 crc, const GSFreezeData& fd, const
AddHeader(crc, fd, regs);
}
void GSDump::AppendRawData(const void *data, size_t size)
void GSDump::AppendRawData(const void* data, size_t size)
{
Write(data, size);
}
@ -122,7 +122,8 @@ GSDumpXz::GSDumpXz(const std::string& fn, uint32 crc, const GSFreezeData& fd, co
{
m_strm = LZMA_STREAM_INIT;
lzma_ret ret = lzma_easy_encoder(&m_strm, 6 /*level*/, LZMA_CHECK_CRC64);
if (ret != LZMA_OK) {
if (ret != LZMA_OK)
{
fprintf(stderr, "GSDumpXz: Error initializing LZMA encoder ! (error code %u)\n", ret);
return;
}
@ -141,7 +142,7 @@ GSDumpXz::~GSDumpXz()
lzma_end(&m_strm);
}
void GSDumpXz::AppendRawData(const void *data, size_t size)
void GSDumpXz::AppendRawData(const void* data, size_t size)
{
size_t old_size = m_in_buff.size();
m_in_buff.resize(old_size + size);
@ -151,7 +152,7 @@ void GSDumpXz::AppendRawData(const void *data, size_t size)
// is enabled, it will freeze PCSX2. 1GB should be enough for long dump.
//
// Note: long dumps are currently not supported so this path won't be executed
if (m_in_buff.size() > 1024*1024*1024)
if (m_in_buff.size() > 1024 * 1024 * 1024)
Flush();
}
@ -175,15 +176,17 @@ void GSDumpXz::Flush()
void GSDumpXz::Compress(lzma_action action, lzma_ret expected_status)
{
std::vector<uint8> out_buff(1024*1024);
do {
std::vector<uint8> out_buff(1024 * 1024);
do
{
m_strm.next_out = out_buff.data();
m_strm.avail_out = out_buff.size();
lzma_ret ret = lzma_code(&m_strm, action);
if (ret != expected_status) {
fprintf (stderr, "GSDumpXz: Error %d\n", (int) ret);
if (ret != expected_status)
{
fprintf(stderr, "GSDumpXz: Error %d\n", (int)ret);
return;
}

View File

@ -52,9 +52,9 @@ class GSDumpBase
protected:
void AddHeader(uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs);
void Write(const void *data, size_t size);
void Write(const void* data, size_t size);
virtual void AppendRawData(const void *data, size_t size) = 0;
virtual void AppendRawData(const void* data, size_t size) = 0;
virtual void AppendRawData(uint8 c) = 0;
public:
@ -68,7 +68,7 @@ public:
class GSDump final : public GSDumpBase
{
void AppendRawData(const void *data, size_t size) final;
void AppendRawData(const void* data, size_t size) final;
void AppendRawData(uint8 c) final;
public:
@ -84,7 +84,7 @@ class GSDumpXz final : public GSDumpBase
void Flush();
void Compress(lzma_action action, lzma_ret expected_status);
void AppendRawData(const void *data, size_t size);
void AppendRawData(const void* data, size_t size);
void AppendRawData(uint8 c);
public:

File diff suppressed because it is too large Load Diff

View File

@ -42,17 +42,17 @@ public:
int* col[8]; // rowOffset*
};
union {uint32 hash; struct {uint32 bp:14, bw:6, psm:6;};};
union { uint32 hash; struct { uint32 bp:14, bw:6, psm:6; }; };
Block block;
Pixel pixel;
std::array<uint32*,256> pages_as_bit; // texture page coverage based on the texture size. Lazy allocated
std::array<uint32*, 256> pages_as_bit; // texture page coverage based on the texture size. Lazy allocated
GSOffset(uint32 bp, uint32 bw, uint32 psm);
virtual ~GSOffset();
enum {EOP = 0xffffffff};
enum { EOP = 0xffffffff };
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
void* GetPagesAsBits(const GSVector4i& rect, void* pages);
@ -164,7 +164,10 @@ protected:
__forceinline static uint32 Expand16To32(uint16 c, const GIFRegTEXA& TEXA)
{
return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3);
return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24)
| ((c & 0x7c00) << 9)
| ((c & 0x03e0) << 6)
| ((c & 0x001f) << 3);
}
// TODO
@ -557,7 +560,8 @@ public:
__forceinline void WritePixel4(uint32 addr, uint32 c)
{
int shift = (addr & 1) << 2; addr >>= 1;
int shift = (addr & 1) << 2;
addr >>= 1;
m_vm8[addr] = (uint8)((m_vm8[addr] & (0xf0 >> shift)) | ((c & 0x0f) << shift));
}
@ -620,7 +624,7 @@ public:
WritePixel8H(PixelAddress32(x, y, bp, bw), c);
}
__forceinline void WritePixel4HL(int x, int y, uint32 c, uint32 bp, uint32 bw)
__forceinline void WritePixel4HL(int x, int y, uint32 c, uint32 bp, uint32 bw)
{
WritePixel4HL(PixelAddress32(x, y, bp, bw), c);
}
@ -674,13 +678,13 @@ public:
{
src -= r.left * sizeof(uint32);
for(int y = r.top; y < r.bottom; y++, src += pitch)
for (int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint32* RESTRICT d = &m_vm32[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
for(int x = r.left; x < r.right; x++)
for (int x = r.left; x < r.right; x++)
{
d[col[x]] = s[x];
}
@ -691,13 +695,13 @@ public:
{
src -= r.left * sizeof(uint32);
for(int y = r.top; y < r.bottom; y++, src += pitch)
for (int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint32* RESTRICT d = &m_vm32[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
for(int x = r.left; x < r.right; x++)
for (int x = r.left; x < r.right; x++)
{
d[col[x]] = (d[col[x]] & 0xff000000) | (s[x] & 0x00ffffff);
}
@ -708,13 +712,13 @@ public:
{
src -= r.left * sizeof(uint16);
for(int y = r.top; y < r.bottom; y++, src += pitch)
for (int y = r.top; y < r.bottom; y++, src += pitch)
{
uint16* RESTRICT s = (uint16*)src;
uint16* RESTRICT d = &m_vm16[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
for(int x = r.left; x < r.right; x++)
for (int x = r.left; x < r.right; x++)
{
d[col[x]] = s[x];
}
@ -725,13 +729,13 @@ public:
{
src -= r.left * sizeof(uint32);
for(int y = r.top; y < r.bottom; y++, src += pitch)
for (int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint16* RESTRICT d = &m_vm16[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
for(int x = r.left; x < r.right; x++)
for (int x = r.left; x < r.right; x++)
{
uint32 rb = s[x] & 0x00f800f8;
uint32 ga = s[x] & 0x8000f800;
@ -848,19 +852,19 @@ public:
//
template<int psm, int bsx, int bsy, int alignment>
template <int psm, int bsx, int bsy, int alignment>
void WriteImageColumn(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF);
template<int psm, int bsx, int bsy, int alignment>
template <int psm, int bsx, int bsy, int alignment>
void WriteImageBlock(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF);
template<int psm, int bsx, int bsy>
template <int psm, int bsx, int bsy>
void WriteImageLeftRight(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF);
template<int psm, int bsx, int bsy, int trbpp>
template <int psm, int bsx, int bsy, int trbpp>
void WriteImageTopBottom(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF);
template<int psm, int bsx, int bsy, int trbpp>
template <int psm, int bsx, int bsy, int trbpp>
void WriteImage(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
void WriteImage24(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
@ -913,10 +917,10 @@ public:
//
template<typename T> void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
template <typename T>
void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
//
void SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h);
};

View File

@ -21,32 +21,36 @@
#include "stdafx.h"
#include "GSLzma.h"
GSDumpFile::GSDumpFile(char* filename, const char* repack_filename) {
GSDumpFile::GSDumpFile(char* filename, const char* repack_filename)
{
m_fp = fopen(filename, "rb");
if (m_fp == nullptr) {
if (m_fp == nullptr)
{
fprintf(stderr, "failed to open %s\n", filename);
throw "BAD"; // Just exit the program
}
m_repack_fp = nullptr;
if (repack_filename) {
if (repack_filename)
{
m_repack_fp = fopen(repack_filename, "wb");
if (m_repack_fp == nullptr)
fprintf(stderr, "failed to open %s for repack\n", repack_filename);
}
}
void GSDumpFile::Repack(void* ptr, size_t size) {
void GSDumpFile::Repack(void* ptr, size_t size)
{
if (m_repack_fp == nullptr)
return;
size_t ret = fwrite(ptr, 1, size, m_repack_fp);
if (ret != size)
fprintf(stderr, "Failed to repack\n");
}
GSDumpFile::~GSDumpFile() {
GSDumpFile::~GSDumpFile()
{
if (m_fp)
fclose(m_fp);
if (m_repack_fp)
@ -54,13 +58,16 @@ GSDumpFile::~GSDumpFile() {
}
/******************************************************************/
GSDumpLzma::GSDumpLzma(char* filename, const char* repack_filename) : GSDumpFile(filename, repack_filename) {
GSDumpLzma::GSDumpLzma(char* filename, const char* repack_filename)
: GSDumpFile(filename, repack_filename)
{
memset(&m_strm, 0, sizeof(lzma_stream));
lzma_ret ret = lzma_stream_decoder(&m_strm, UINT32_MAX, 0);
if (ret != LZMA_OK) {
if (ret != LZMA_OK)
{
fprintf(stderr, "Error initializing the decoder! (error code %u)\n", ret);
throw "BAD"; // Just exit the program
}
@ -78,18 +85,21 @@ GSDumpLzma::GSDumpLzma(char* filename, const char* repack_filename) : GSDumpFile
m_strm.next_out = m_area;
}
void GSDumpLzma::Decompress() {
void GSDumpLzma::Decompress()
{
lzma_action action = LZMA_RUN;
m_strm.next_out = m_area;
m_strm.avail_out = m_buff_size;
// Nothing left in the input buffer. Read data from the file
if (m_strm.avail_in == 0 && !feof(m_fp)) {
if (m_strm.avail_in == 0 && !feof(m_fp))
{
m_strm.next_in = m_inbuf;
m_strm.avail_in = fread(m_inbuf, 1, BUFSIZ, m_fp);
if (ferror(m_fp)) {
if (ferror(m_fp))
{
fprintf(stderr, "Read error: %s\n", strerror(errno));
throw "BAD"; // Just exit the program
}
@ -97,10 +107,12 @@ void GSDumpLzma::Decompress() {
lzma_ret ret = lzma_code(&m_strm, action);
if (ret != LZMA_OK) {
if (ret != LZMA_OK)
{
if (ret == LZMA_STREAM_END)
fprintf(stderr, "LZMA decoder finished without error\n\n");
else {
else
{
fprintf(stderr, "Decoder error: (error code %u)\n", ret);
throw "BAD"; // Just exit the program
}
@ -110,28 +122,33 @@ void GSDumpLzma::Decompress() {
m_avail = m_buff_size - m_strm.avail_out;
}
bool GSDumpLzma::IsEof() {
bool GSDumpLzma::IsEof()
{
return feof(m_fp) && m_avail == 0 && m_strm.avail_in == 0;
}
bool GSDumpLzma::Read(void* ptr, size_t size) {
bool GSDumpLzma::Read(void* ptr, size_t size)
{
size_t off = 0;
uint8_t* dst = (uint8_t*)ptr;
size_t full_size = size;
while (size && !IsEof()) {
if (m_avail == 0) {
while (size && !IsEof())
{
if (m_avail == 0)
{
Decompress();
}
size_t l = std::min(size, m_avail);
memcpy(dst + off, m_area+m_start, l);
memcpy(dst + off, m_area + m_start, l);
m_avail -= l;
size -= l;
m_start += l;
off += l;
}
if (size == 0) {
if (size == 0)
{
Repack(ptr, full_size);
return true;
}
@ -139,7 +156,8 @@ bool GSDumpLzma::Read(void* ptr, size_t size) {
return false;
}
GSDumpLzma::~GSDumpLzma() {
GSDumpLzma::~GSDumpLzma()
{
lzma_end(&m_strm);
if (m_inbuf)
@ -150,26 +168,32 @@ GSDumpLzma::~GSDumpLzma() {
/******************************************************************/
GSDumpRaw::GSDumpRaw(char* filename, const char* repack_filename) : GSDumpFile(filename, repack_filename) {
GSDumpRaw::GSDumpRaw(char* filename, const char* repack_filename)
: GSDumpFile(filename, repack_filename)
{
m_buff_size = 0;
m_area = NULL;
m_inbuf = NULL;
m_area = nullptr;
m_inbuf = nullptr;
m_avail = 0;
m_start = 0;
}
bool GSDumpRaw::IsEof() {
bool GSDumpRaw::IsEof()
{
return !!feof(m_fp);
}
bool GSDumpRaw::Read(void* ptr, size_t size) {
bool GSDumpRaw::Read(void* ptr, size_t size)
{
size_t ret = fread(ptr, 1, size, m_fp);
if (ret != size && ferror(m_fp)) {
if (ret != size && ferror(m_fp))
{
fprintf(stderr, "GSDumpRaw:: Read error (%zu/%zu)\n", ret, size);
throw "BAD"; // Just exit the program
}
if (ret == size) {
if (ret == size)
{
Repack(ptr, size);
return true;
}

View File

@ -20,15 +20,16 @@
#include <lzma.h>
class GSDumpFile {
FILE* m_repack_fp;
class GSDumpFile
{
FILE* m_repack_fp;
protected:
FILE* m_fp;
protected:
FILE* m_fp;
void Repack(void* ptr, size_t size);
public:
public:
virtual bool IsEof() = 0;
virtual bool Read(void* ptr, size_t size) = 0;
@ -36,21 +37,20 @@ class GSDumpFile {
virtual ~GSDumpFile();
};
class GSDumpLzma : public GSDumpFile {
class GSDumpLzma : public GSDumpFile
{
lzma_stream m_strm;
size_t m_buff_size;
uint8_t* m_area;
uint8_t* m_inbuf;
size_t m_buff_size;
uint8_t* m_area;
uint8_t* m_inbuf;
size_t m_avail;
size_t m_start;
size_t m_avail;
size_t m_start;
void Decompress();
public:
public:
GSDumpLzma(char* filename, const char* repack_filename);
virtual ~GSDumpLzma();
@ -58,17 +58,16 @@ class GSDumpLzma : public GSDumpFile {
bool Read(void* ptr, size_t size) final;
};
class GSDumpRaw : public GSDumpFile {
class GSDumpRaw : public GSDumpFile
{
size_t m_buff_size;
uint8_t* m_area;
uint8_t* m_inbuf;
size_t m_buff_size;
uint8_t* m_area;
uint8_t* m_inbuf;
size_t m_avail;
size_t m_start;
public:
size_t m_avail;
size_t m_start;
public:
GSDumpRaw(char* filename, const char* repack_filename);
virtual ~GSDumpRaw() = default;

View File

@ -36,19 +36,19 @@ GSPerfMon::GSPerfMon()
void GSPerfMon::Put(counter_t c, double val)
{
#ifndef DISABLE_PERF_MON
if(c == Frame)
if (c == Frame)
{
#if defined(__unix__) || defined(__APPLE__)
// clock on linux will return CLOCK_PROCESS_CPUTIME_ID.
// CLOCK_THREAD_CPUTIME_ID is much more useful to measure the fps
struct timespec ts;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
uint64 now = (uint64) ts.tv_sec * (uint64) 1e6 + (uint64) ts.tv_nsec / (uint64) 1e3;
uint64 now = (uint64)ts.tv_sec * (uint64)1e6 + (uint64)ts.tv_nsec / (uint64)1e3;
#else
clock_t now = clock();
#endif
if(m_lastframe != 0)
if (m_lastframe != 0)
{
m_counters[c] += (now - m_lastframe) * 1000 / CLOCKS_PER_SEC;
}
@ -67,9 +67,9 @@ void GSPerfMon::Put(counter_t c, double val)
void GSPerfMon::Update()
{
#ifndef DISABLE_PERF_MON
if(m_count > 0)
if (m_count > 0)
{
for(size_t i = 0; i < countof(m_counters); i++)
for (size_t i = 0; i < countof(m_counters); i++)
{
m_stats[i] = m_counters[i] / m_count;
}
@ -86,7 +86,7 @@ void GSPerfMon::Start(int timer)
#ifndef DISABLE_PERF_MON
m_start[timer] = __rdtsc();
if(m_begin[timer] == 0)
if (m_begin[timer] == 0)
{
m_begin[timer] = m_start[timer];
}
@ -96,7 +96,7 @@ void GSPerfMon::Start(int timer)
void GSPerfMon::Stop(int timer)
{
#ifndef DISABLE_PERF_MON
if(m_start[timer] > 0)
if (m_start[timer] > 0)
{
m_total[timer] += __rdtsc() - m_start[timer];
m_start[timer] = 0;
@ -108,7 +108,7 @@ int GSPerfMon::CPU(int timer, bool reset)
{
int percent = (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer]));
if(reset)
if (reset)
{
m_begin[timer] = 0;
m_start[timer] = 0;

View File

@ -35,7 +35,14 @@ public:
enum counter_t
{
Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad, SyncPoint,
Frame,
Prim,
Draw,
Swizzle,
Unswizzle,
Fillrate,
Quad,
SyncPoint,
CounterLast,
};
@ -52,11 +59,11 @@ protected:
public:
GSPerfMon();
void SetFrame(uint64 frame) {m_frame = frame;}
uint64 GetFrame() {return m_frame;}
void SetFrame(uint64 frame) { m_frame = frame; }
uint64 GetFrame() { return m_frame; }
void Put(counter_t c, double val = 0);
double Get(counter_t c) {return m_stats[c];}
double Get(counter_t c) { return m_stats[c]; }
void Update();
void Start(int timer = Main);
@ -70,6 +77,10 @@ class GSPerfMonAutoTimer
int m_timer;
public:
GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main) {m_timer = timer; (m_pm = pm)->Start(m_timer);}
~GSPerfMonAutoTimer() {m_pm->Stop(m_timer);}
GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main)
{
m_timer = timer;
(m_pm = pm)->Start(m_timer);
}
~GSPerfMonAutoTimer() { m_pm->Stop(m_timer); }
};

View File

@ -23,129 +23,135 @@
#include <zlib.h>
#include <png.h>
struct {
int type;
int bytes_per_pixel_in;
int bytes_per_pixel_out;
int channel_bit_depth;
const char *extension[2];
struct
{
int type;
int bytes_per_pixel_in;
int bytes_per_pixel_out;
int channel_bit_depth;
const char *extension[2];
} static const pixel[GSPng::Format::COUNT] = {
{PNG_COLOR_TYPE_RGBA, 4, 4, 8 , {"_full.png", nullptr}}, // RGBA_PNG
{PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", nullptr}}, // RGB_PNG
{PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", "_alpha.png"}}, // RGB_A_PNG
{PNG_COLOR_TYPE_GRAY, 4, 1, 8 , {"_alpha.png", nullptr}}, // ALPHA_PNG
{PNG_COLOR_TYPE_GRAY, 1, 1, 8 , {"_R8I.png", nullptr}}, // R8I_PNG
{PNG_COLOR_TYPE_GRAY, 2, 2, 16, {"_R16I.png", nullptr}}, // R16I_PNG
{PNG_COLOR_TYPE_GRAY, 4, 2, 16, {"_R32I_lsb.png", "_R32I_msb.png"}}, // R32I_PNG
{PNG_COLOR_TYPE_RGBA, 4, 4, 8 , {"_full.png", nullptr}}, // RGBA_PNG
{PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", nullptr}}, // RGB_PNG
{PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", "_alpha.png"}}, // RGB_A_PNG
{PNG_COLOR_TYPE_GRAY, 4, 1, 8 , {"_alpha.png", nullptr}}, // ALPHA_PNG
{PNG_COLOR_TYPE_GRAY, 1, 1, 8 , {"_R8I.png", nullptr}}, // R8I_PNG
{PNG_COLOR_TYPE_GRAY, 2, 2, 16, {"_R16I.png", nullptr}}, // R16I_PNG
{PNG_COLOR_TYPE_GRAY, 4, 2, 16, {"_R32I_lsb.png", "_R32I_msb.png"}}, // R32I_PNG
};
namespace GSPng {
namespace GSPng
{
bool SaveFile(const std::string& file, const Format fmt, const uint8* const image,
uint8* const row, const int width, const int height, const int pitch,
const int compression, const bool rb_swapped = false, const bool first_image = false)
{
const int channel_bit_depth = pixel[fmt].channel_bit_depth;
const int bytes_per_pixel_in = pixel[fmt].bytes_per_pixel_in;
bool SaveFile(const std::string& file, const Format fmt, const uint8* const image,
uint8* const row, const int width, const int height, const int pitch,
const int compression, const bool rb_swapped = false, const bool first_image = false)
{
const int channel_bit_depth = pixel[fmt].channel_bit_depth;
const int bytes_per_pixel_in = pixel[fmt].bytes_per_pixel_in;
const int type = first_image ? pixel[fmt].type : PNG_COLOR_TYPE_GRAY;
const int offset = first_image ? 0 : pixel[fmt].bytes_per_pixel_out;
const int bytes_per_pixel_out = first_image ? pixel[fmt].bytes_per_pixel_out : bytes_per_pixel_in - offset;
const int type = first_image ? pixel[fmt].type : PNG_COLOR_TYPE_GRAY;
const int offset = first_image ? 0 : pixel[fmt].bytes_per_pixel_out;
const int bytes_per_pixel_out = first_image ? pixel[fmt].bytes_per_pixel_out : bytes_per_pixel_in - offset;
FILE *fp = px_fopen(file, "wb");
if (fp == nullptr)
return false;
FILE* fp = px_fopen(file, "wb");
if (fp == nullptr)
return false;
png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
png_infop info_ptr = nullptr;
png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
png_infop info_ptr = nullptr;
bool success;
try {
if (png_ptr == nullptr)
throw GSDXRecoverableError();
bool success;
try
{
if (png_ptr == nullptr)
throw GSDXRecoverableError();
info_ptr = png_create_info_struct(png_ptr);
if (info_ptr == nullptr)
throw GSDXRecoverableError();
info_ptr = png_create_info_struct(png_ptr);
if (info_ptr == nullptr)
throw GSDXRecoverableError();
if (setjmp(png_jmpbuf(png_ptr)))
throw GSDXRecoverableError();
if (setjmp(png_jmpbuf(png_ptr)))
throw GSDXRecoverableError();
png_init_io(png_ptr, fp);
png_set_compression_level(png_ptr, compression);
png_set_IHDR(png_ptr, info_ptr, width, height, channel_bit_depth, type,
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
png_write_info(png_ptr, info_ptr);
png_init_io(png_ptr, fp);
png_set_compression_level(png_ptr, compression);
png_set_IHDR(png_ptr, info_ptr, width, height, channel_bit_depth, type,
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
png_write_info(png_ptr, info_ptr);
if (channel_bit_depth > 8)
png_set_swap(png_ptr);
if (rb_swapped && type != PNG_COLOR_TYPE_GRAY)
png_set_bgr(png_ptr);
if (channel_bit_depth > 8)
png_set_swap(png_ptr);
if (rb_swapped && type != PNG_COLOR_TYPE_GRAY)
png_set_bgr(png_ptr);
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x)
for (int i = 0; i < bytes_per_pixel_out; ++i)
row[bytes_per_pixel_out * x + i] = image[y * pitch + bytes_per_pixel_in * x + i + offset];
png_write_row(png_ptr, row);
}
png_write_end(png_ptr, nullptr);
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
for (int i = 0; i < bytes_per_pixel_out; ++i)
row[bytes_per_pixel_out * x + i] = image[y * pitch + bytes_per_pixel_in * x + i + offset];
png_write_row(png_ptr, row);
}
png_write_end(png_ptr, nullptr);
success = true;
} catch (GSDXRecoverableError&) {
fprintf(stderr, "Failed to write image %s\n", file.c_str());
success = true;
}
catch (GSDXRecoverableError&)
{
fprintf(stderr, "Failed to write image %s\n", file.c_str());
success = false;
}
}
if (png_ptr)
png_destroy_write_struct(&png_ptr, info_ptr ? &info_ptr : nullptr);
fclose(fp);
if (png_ptr)
png_destroy_write_struct(&png_ptr, info_ptr ? &info_ptr : nullptr);
fclose(fp);
return success;
}
return success;
}
bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped)
{
std::string root = file;
root.replace(file.length() - 4, 4, "");
bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped)
{
std::string root = file;
root.replace(file.length() - 4, 4, "");
ASSERT(fmt >= Format::START && fmt < Format::COUNT);
ASSERT(fmt >= Format::START && fmt < Format::COUNT);
if (compression < 0 || compression > Z_BEST_COMPRESSION)
compression = Z_BEST_SPEED;
if (compression < 0 || compression > Z_BEST_COMPRESSION)
compression = Z_BEST_SPEED;
std::unique_ptr<uint8[]> row(new uint8[pixel[fmt].bytes_per_pixel_out * w]);
std::unique_ptr<uint8[]> row(new uint8[pixel[fmt].bytes_per_pixel_out * w]);
std::string filename = root + pixel[fmt].extension[0];
if (!SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression, rb_swapped, true))
return false;
std::string filename = root + pixel[fmt].extension[0];
if (!SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression, rb_swapped, true))
return false;
// Second image
if (pixel[fmt].extension[1] == nullptr)
return true;
// Second image
if (pixel[fmt].extension[1] == nullptr)
return true;
filename = root + pixel[fmt].extension[1];
return SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression);
}
filename = root + pixel[fmt].extension[1];
return SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression);
}
Transaction::Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression)
: m_fmt(fmt), m_file(file), m_w(w), m_h(h), m_pitch(pitch), m_compression(compression)
{
// Note: yes it would be better to use shared pointer
m_image = (uint8*)_aligned_malloc(pitch*h, 32);
if (m_image)
memcpy(m_image, image, pitch*h);
}
Transaction::Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression)
: m_fmt(fmt), m_file(file), m_w(w), m_h(h), m_pitch(pitch), m_compression(compression)
{
// Note: yes it would be better to use shared pointer
m_image = (uint8*)_aligned_malloc(pitch * h, 32);
if (m_image)
memcpy(m_image, image, pitch * h);
}
Transaction::~Transaction()
{
if (m_image)
_aligned_free(m_image);
}
Transaction::~Transaction()
{
if (m_image)
_aligned_free(m_image);
}
void Process(std::shared_ptr<Transaction>& item)
{
Save(item->m_fmt, item->m_file, item->m_image, item->m_w, item->m_h, item->m_pitch, item->m_compression);
}
void Process(std::shared_ptr<Transaction>& item)
{
Save(item->m_fmt, item->m_file, item->m_image, item->m_w, item->m_h, item->m_pitch, item->m_compression);
}
}
} // namespace GSPng

View File

@ -22,37 +22,39 @@
#include "GSThread_CXX11.h"
namespace GSPng {
enum Format {
START = 0,
RGBA_PNG = 0,
RGB_PNG,
RGB_A_PNG,
ALPHA_PNG,
R8I_PNG,
R16I_PNG,
R32I_PNG,
COUNT
};
namespace GSPng
{
enum Format
{
START = 0,
RGBA_PNG = 0,
RGB_PNG,
RGB_A_PNG,
ALPHA_PNG,
R8I_PNG,
R16I_PNG,
R32I_PNG,
COUNT
};
class Transaction
{
public:
Format m_fmt;
const std::string m_file;
uint8* m_image;
int m_w;
int m_h;
int m_pitch;
int m_compression;
public:
Format m_fmt;
const std::string m_file;
uint8* m_image;
int m_w;
int m_h;
int m_pitch;
int m_compression;
Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression);
~Transaction();
Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression);
~Transaction();
};
bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped = false);
bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped = false);
void Process(std::shared_ptr<Transaction> &item);
void Process(std::shared_ptr<Transaction>& item);
using Worker = GSJobQueue<std::shared_ptr<Transaction>, 16>;
}
using Worker = GSJobQueue<std::shared_ptr<Transaction>, 16>;
} // namespace GSPng

View File

@ -294,15 +294,15 @@ void GSState::ResetHandlers()
m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D;
m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP;
#define SetHandlerXYZ(P, auto_flush) \
m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2<P, 0, auto_flush>; \
m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2<P, 1, auto_flush>; \
m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2<P, 0, auto_flush>; \
m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2<P, 1, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2<P, 0, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2<P, 1, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2<P, 0, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2<P, 1, auto_flush>; \
#define SetHandlerXYZ(P, auto_flush) \
m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2<P, 0, auto_flush>; \
m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2<P, 1, auto_flush>; \
m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2<P, 0, auto_flush>; \
m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2<P, 1, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2<P, 0, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2<P, 1, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2<P, 0, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2<P, 1, auto_flush>; \
m_fpGIFPackedRegHandlerSTQRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZF2<P, auto_flush>; \
m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2<P, auto_flush>;

View File

@ -195,7 +195,7 @@ protected:
void GrowVertexBuffer();
template<uint32 prim, bool auto_flush>
template <uint32 prim, bool auto_flush>
void VertexKick(uint32 skip);
// following functions need m_vt to be initialized
@ -269,16 +269,19 @@ public:
void InitReadFIFO(uint8* mem, int len);
void SoftReset(uint32 mask);
void WriteCSR(uint32 csr) {m_regs->CSR.u32[1] = csr;}
void WriteCSR(uint32 csr) { m_regs->CSR.u32[1] = csr; }
void ReadFIFO(uint8* mem, int size);
template<int index> void Transfer(const uint8* mem, uint32 size);
int Freeze(GSFreezeData* fd, bool sizeonly);
int Defrost(const GSFreezeData* fd);
void GetLastTag(uint32* tag) {*tag = m_path3hack; m_path3hack = 0;}
void GetLastTag(uint32* tag)
{
*tag = m_path3hack;
m_path3hack = 0;
}
virtual void SetGameCRC(uint32 crc, int options);
void SetFrameSkip(int skip);
void SetRegsMem(uint8* basemem);
void SetIrqCallback(void (*irq)());
void SetMultithreaded(bool mt = true);
};

View File

@ -72,7 +72,7 @@ public:
// http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/4th-gen-core-family-desktop-specification-update.pdf)
// This can cause builds for AVX2 CPUs to fail with GCC/Clang on Linux,
// so check that the RTM instructions are actually available.
#if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__)
#if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__)
int nretries = 0;
@ -104,7 +104,7 @@ public:
}
}
#endif
#endif
fallBackLock.lock();
}
@ -115,12 +115,12 @@ public:
{
fallBackLock.unlock();
}
#if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__)
#if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__)
else
{
_xend();
}
#endif
#endif
}
};
#endif

View File

@ -24,7 +24,8 @@
#include "GSdx.h"
#include "Utilities/boost_spsc_queue.hpp"
template<class T, int CAPACITY> class GSJobQueue final
template <class T, int CAPACITY>
class GSJobQueue final
{
private:
std::thread m_thread;
@ -37,12 +38,15 @@ private:
std::condition_variable m_empty;
std::condition_variable m_notempty;
void ThreadProc() {
void ThreadProc()
{
std::unique_lock<std::mutex> l(m_lock);
while (true) {
while (true)
{
while (m_queue.empty()) {
while (m_queue.empty())
{
if (m_exit)
return;
@ -64,9 +68,9 @@ private:
}
public:
GSJobQueue(std::function<void(T&)> func) :
m_func(func),
m_exit(false)
GSJobQueue(std::function<void(T&)> func)
: m_func(func)
, m_exit(false)
{
m_thread = std::thread(&GSJobQueue::ThreadProc, this);
}
@ -87,8 +91,9 @@ public:
return m_queue.empty();
}
void Push(const T& item) {
while(!m_queue.push(item))
void Push(const T& item)
{
while (!m_queue.push(item))
std::this_thread::yield();
{
@ -109,7 +114,8 @@ public:
assert(IsEmpty());
}
void operator() (T& item) {
void operator()(T& item)
{
m_func(item);
}
};

View File

@ -128,7 +128,7 @@ public:
memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField));
for(int i = 0; i < 64; i++)
for (int i = 0; i < 64; i++)
{
CompatibleBitsField[i][i >> 5] |= 1 << (i & 0x1f);
}
@ -211,7 +211,8 @@ bool GSUtil::CheckSSE()
{
bool status = true;
struct ISA {
struct ISA
{
Xbyak::util::Cpu::Type type;
const char* name;
};
@ -228,8 +229,10 @@ bool GSUtil::CheckSSE()
#endif
};
for (size_t i = 0; i < countof(checks); i++) {
if(!g_cpu.has(checks[i].type)) {
for (size_t i = 0; i < countof(checks); i++)
{
if (!g_cpu.has(checks[i].type))
{
fprintf(stderr, "This CPU does not support %s\n", checks[i].name);
status = false;
@ -284,12 +287,12 @@ bool GSUtil::CheckD3D11()
return s_D3D11 > 0;
}
D3D_FEATURE_LEVEL GSUtil::CheckDirect3D11Level(IDXGIAdapter *adapter, D3D_DRIVER_TYPE type)
D3D_FEATURE_LEVEL GSUtil::CheckDirect3D11Level(IDXGIAdapter* adapter, D3D_DRIVER_TYPE type)
{
HRESULT hr;
D3D_FEATURE_LEVEL level;
if(!CheckD3D11())
if (!CheckD3D11())
return (D3D_FEATURE_LEVEL)0;
hr = D3D11CreateDevice(adapter, type, NULL, 0, NULL, 0, D3D11_SDK_VERSION, NULL, &level, NULL);
@ -323,9 +326,11 @@ GSRendererType GSUtil::GetBestRenderer()
#ifdef _WIN32
void GSmkdir(const wchar_t* dir)
{
if (!CreateDirectory(dir, nullptr)) {
if (!CreateDirectory(dir, nullptr))
{
DWORD errorID = ::GetLastError();
if (errorID != ERROR_ALREADY_EXISTS) {
if (errorID != ERROR_ALREADY_EXISTS)
{
fprintf(stderr, "Failed to create directory: %ls error %u\n", dir, errorID);
}
}
@ -355,7 +360,8 @@ std::string GStempdir()
const char* psm_str(int psm)
{
switch(psm) {
switch (psm)
{
// Normal color
case PSM_PSMCT32: return "C_32";
case PSM_PSMCT24: return "C_24";
@ -375,7 +381,7 @@ const char* psm_str(int psm)
case PSM_PSMZ16: return "Z_16";
case PSM_PSMZ16S: return "Z_16S";
case PSM_PSGPU24: return "PS24";
case PSM_PSGPU24: return "PS24";
default:break;
}

View File

@ -48,7 +48,7 @@ public:
static bool CheckDXGI();
static bool CheckD3D11();
static GSRendererType GetBestRenderer();
static D3D_FEATURE_LEVEL CheckDirect3D11Level(IDXGIAdapter *adapter = NULL, D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_HARDWARE);
static D3D_FEATURE_LEVEL CheckDirect3D11Level(IDXGIAdapter* adapter = NULL, D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_HARDWARE);
#endif
};

View File

@ -168,23 +168,25 @@ GSVector4i GSVector4i::fit(int arx, int ary) const
{
GSVector4i r = *this;
if(arx > 0 && ary > 0)
if (arx > 0 && ary > 0)
{
int w = width();
int h = height();
if(w * ary > h * arx)
if (w * ary > h * arx)
{
w = h * arx / ary;
r.left = (r.left + r.right - w) >> 1;
if(r.left & 1) r.left++;
if (r.left & 1)
r.left++;
r.right = r.left + w;
}
else
{
h = w * ary / arx;
r.top = (r.top + r.bottom - h) >> 1;
if(r.top & 1) r.top++;
if (r.top & 1)
r.top++;
r.bottom = r.top + h;
}
@ -204,7 +206,7 @@ GSVector4i GSVector4i::fit(int preset) const
{
GSVector4i r;
if(preset > 0 && preset < (int)countof(s_ar))
if (preset > 0 && preset < (int)countof(s_ar))
{
r = fit(s_ar[preset][0], s_ar[preset][1]);
}

View File

@ -41,14 +41,15 @@ enum Round_Mode
#pragma pack(push, 1)
template<class T> class GSVector2T
template <class T>
class GSVector2T
{
public:
union
{
struct {T x, y;};
struct {T r, g;};
struct {T v[2];};
struct { T x, y; };
struct { T r, g; };
struct { T v[2]; };
};
GSVector2T()
@ -67,12 +68,12 @@ public:
this->y = y;
}
bool operator == (const GSVector2T& v) const
bool operator==(const GSVector2T& v) const
{
return x == v.x && y == v.y;
}
bool operator != (const GSVector2T& v) const
bool operator!=(const GSVector2T& v) const
{
return x != v.x || y != v.y;
}

View File

@ -52,9 +52,9 @@ class alignas(16) GSVector4
public:
union
{
struct {float x, y, z, w;};
struct {float r, g, b, a;};
struct {float left, top, right, bottom;};
struct { float x, y, z, w; };
struct { float r, g, b, a; };
struct { float left, top, right, bottom; };
float v[4];
float f32[4];
int8 i8[16];
@ -147,17 +147,17 @@ public:
__forceinline explicit GSVector4(int i)
{
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
m = _mm_cvtepi32_ps(_mm_broadcastd_epi32(_mm_cvtsi32_si128(i)));
#else
#else
GSVector4i v((int)i);
*this = GSVector4(v);
#endif
#endif
}
__forceinline explicit GSVector4(uint32 u)
@ -171,37 +171,37 @@ public:
__forceinline static GSVector4 cast(const GSVector4i& v);
#if _M_SSE >= 0x500
#if _M_SSE >= 0x500
__forceinline static GSVector4 cast(const GSVector8& v);
#endif
#endif
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
__forceinline static GSVector4 cast(const GSVector8i& v);
#endif
#endif
__forceinline void operator = (const GSVector4& v)
__forceinline void operator=(const GSVector4& v)
{
m = v.m;
}
__forceinline void operator = (float f)
__forceinline void operator=(float f)
{
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
m = _mm_broadcastss_ps(_mm_load_ss(&f));
m = _mm_broadcastss_ps(_mm_load_ss(&f));
#else
#else
m = _mm_set1_ps(f);
#endif
#endif
}
__forceinline void operator = (__m128 m)
__forceinline void operator=(__m128 m)
{
this->m = m;
}
@ -248,7 +248,8 @@ public:
return (v + v) - (v * v) * *this;
}
template<int mode> __forceinline GSVector4 round() const
template <int mode>
__forceinline GSVector4 round() const
{
return GSVector4(_mm_round_ps(m, mode));
}
@ -265,12 +266,12 @@ public:
// http://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
#define LOG_POLY0(x, c0) GSVector4(c0)
#define LOG_POLY1(x, c0, c1) (LOG_POLY0(x, c1).madd(x, GSVector4(c0)))
#define LOG_POLY2(x, c0, c1, c2) (LOG_POLY1(x, c1, c2).madd(x, GSVector4(c0)))
#define LOG_POLY3(x, c0, c1, c2, c3) (LOG_POLY2(x, c1, c2, c3).madd(x, GSVector4(c0)))
#define LOG_POLY4(x, c0, c1, c2, c3, c4) (LOG_POLY3(x, c1, c2, c3, c4).madd(x, GSVector4(c0)))
#define LOG_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector4(c0)))
#define LOG_POLY0(x, c0) GSVector4(c0)
#define LOG_POLY1(x, c0, c1) (LOG_POLY0(x, c1).madd(x, GSVector4(c0)))
#define LOG_POLY2(x, c0, c1, c2) (LOG_POLY1(x, c1, c2).madd(x, GSVector4(c0)))
#define LOG_POLY3(x, c0, c1, c2, c3) (LOG_POLY2(x, c1, c2, c3).madd(x, GSVector4(c0)))
#define LOG_POLY4(x, c0, c1, c2, c3, c4) (LOG_POLY3(x, c1, c2, c3, c4).madd(x, GSVector4(c0)))
#define LOG_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector4(c0)))
__forceinline GSVector4 log2(int precision = 5) const
{
@ -290,21 +291,21 @@ public:
// Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
switch(precision)
switch (precision)
{
case 3:
p = LOG_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
break;
case 4:
p = LOG_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
break;
default:
case 5:
p = LOG_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
break;
case 6:
p = LOG_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f);
break;
case 3:
p = LOG_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
break;
case 4:
p = LOG_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
break;
default:
case 5:
p = LOG_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
break;
case 6:
p = LOG_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f);
break;
}
// This effectively increases the polynomial degree by one, but ensures that log2(1) == 0
@ -316,54 +317,54 @@ public:
__forceinline GSVector4 madd(const GSVector4& a, const GSVector4& b) const
{
#if 0//_M_SSE >= 0x501
#if 0 //_M_SSE >= 0x501
return GSVector4(_mm_fmadd_ps(m, a, b));
#else
#else
return *this * a + b;
#endif
#endif
}
__forceinline GSVector4 msub(const GSVector4& a, const GSVector4& b) const
{
#if 0//_M_SSE >= 0x501
#if 0 //_M_SSE >= 0x501
return GSVector4(_mm_fmsub_ps(m, a, b));
#else
#else
return *this * a - b;
#endif
#endif
}
__forceinline GSVector4 nmadd(const GSVector4& a, const GSVector4& b) const
{
#if 0//_M_SSE >= 0x501
#if 0 //_M_SSE >= 0x501
return GSVector4(_mm_fnmadd_ps(m, a, b));
#else
#else
return b - *this * a;
#endif
#endif
}
__forceinline GSVector4 nmsub(const GSVector4& a, const GSVector4& b) const
{
#if 0//_M_SSE >= 0x501
#if 0 //_M_SSE >= 0x501
return GSVector4(_mm_fnmsub_ps(m, a, b));
#else
#else
return -b - *this * a;
#endif
#endif
}
__forceinline GSVector4 addm(const GSVector4& a, const GSVector4& b) const
@ -396,7 +397,8 @@ public:
return GSVector4(_mm_hsub_ps(m, v.m));
}
template<int i> __forceinline GSVector4 dp(const GSVector4& v) const
template <int i>
__forceinline GSVector4 dp(const GSVector4& v) const
{
return GSVector4(_mm_dp_ps(m, v.m, i));
}
@ -431,12 +433,13 @@ public:
return GSVector4(_mm_max_ps(m, a));
}
template<int mask> __forceinline GSVector4 blend32(const GSVector4& a) const
template <int mask>
__forceinline GSVector4 blend32(const GSVector4& a) const
{
return GSVector4(_mm_blend_ps(m, a, mask));
}
__forceinline GSVector4 blend32(const GSVector4& a, const GSVector4& mask) const
__forceinline GSVector4 blend32(const GSVector4& a, const GSVector4& mask) const
{
return GSVector4(_mm_blendv_ps(m, a, mask));
}
@ -488,7 +491,7 @@ public:
__forceinline bool allfalse() const
{
#if _M_SSE >= 0x500
#if _M_SSE >= 0x500
return _mm_testz_ps(m, m) != 0;
@ -498,7 +501,7 @@ public:
return _mm_testz_si128(a, a) != 0;
#endif
#endif
}
__forceinline GSVector4 replace_nan(const GSVector4& v) const
@ -506,65 +509,66 @@ public:
return v.blend32(*this, *this == *this);
}
template<int src, int dst> __forceinline GSVector4 insert32(const GSVector4& v) const
template <int src, int dst>
__forceinline GSVector4 insert32(const GSVector4& v) const
{
// TODO: use blendps when src == dst
#if 0 // _M_SSE >= 0x401
#if 0 // _M_SSE >= 0x401
// NOTE: it's faster with shuffles...
return GSVector4(_mm_insert_ps(m, v.m, _MM_MK_INSERTPS_NDX(src, dst, 0)));
#else
#else
switch(dst)
switch (dst)
{
case 0:
switch(src)
{
case 0: return yyxx(v).zxzw(*this);
case 1: return yyyy(v).zxzw(*this);
case 2: return yyzz(v).zxzw(*this);
case 3: return yyww(v).zxzw(*this);
default: __assume(0);
}
break;
case 1:
switch(src)
{
case 0: return xxxx(v).xzzw(*this);
case 1: return xxyy(v).xzzw(*this);
case 2: return xxzz(v).xzzw(*this);
case 3: return xxww(v).xzzw(*this);
default: __assume(0);
}
break;
case 2:
switch(src)
{
case 0: return xyzx(wwxx(v));
case 1: return xyzx(wwyy(v));
case 2: return xyzx(wwzz(v));
case 3: return xyzx(wwww(v));
default: __assume(0);
}
break;
case 3:
switch(src)
{
case 0: return xyxz(zzxx(v));
case 1: return xyxz(zzyy(v));
case 2: return xyxz(zzzz(v));
case 3: return xyxz(zzww(v));
default: __assume(0);
}
break;
default:
__assume(0);
case 0:
switch (src)
{
case 0: return yyxx(v).zxzw(*this);
case 1: return yyyy(v).zxzw(*this);
case 2: return yyzz(v).zxzw(*this);
case 3: return yyww(v).zxzw(*this);
default: __assume(0);
}
break;
case 1:
switch (src)
{
case 0: return xxxx(v).xzzw(*this);
case 1: return xxyy(v).xzzw(*this);
case 2: return xxzz(v).xzzw(*this);
case 3: return xxww(v).xzzw(*this);
default: __assume(0);
}
break;
case 2:
switch (src)
{
case 0: return xyzx(wwxx(v));
case 1: return xyzx(wwyy(v));
case 2: return xyzx(wwzz(v));
case 3: return xyzx(wwww(v));
default: __assume(0);
}
break;
case 3:
switch (src)
{
case 0: return xyxz(zzxx(v));
case 1: return xyxz(zzyy(v));
case 2: return xyxz(zzzz(v));
case 3: return xyxz(zzww(v));
default: __assume(0);
}
break;
default:
__assume(0);
}
#endif
#endif
}
#ifdef __linux__
@ -582,12 +586,14 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
template<int i> __forceinline int extract32() const
#endif
template<int index> __forceinline int extract32() const
template <int index>
__forceinline int extract32() const
{
return _mm_extract_ps(m, index);
}
#else
template<int i> __forceinline int extract32() const
template <int i>
__forceinline int extract32() const
{
return _mm_extract_ps(m, i);
}
@ -630,7 +636,8 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
}
template<bool aligned> __forceinline static GSVector4 load(const void* p)
template <bool aligned>
__forceinline static GSVector4 load(const void* p)
{
return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p));
}
@ -650,10 +657,13 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
_mm_storeh_pd((double*)p, _mm_castps_pd(v.m));
}
template<bool aligned> __forceinline static void store(void* p, const GSVector4& v)
template <bool aligned>
__forceinline static void store(void* p, const GSVector4& v)
{
if(aligned) _mm_store_ps((float*)p, v.m);
else _mm_storeu_ps((float*)p, v.m);
if (aligned)
_mm_store_ps((float*)p, v.m);
else
_mm_storeu_ps((float*)p, v.m);
}
__forceinline static void store(float* p, const GSVector4& v)
@ -710,156 +720,157 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
b = v2.h2l(v0);
c = v1.l2h(v3);
d = v3.h2l(v1);
*/ }
*/
}
__forceinline GSVector4 operator - () const
__forceinline GSVector4 operator-() const
{
return neg();
}
__forceinline void operator += (const GSVector4& v)
__forceinline void operator+=(const GSVector4& v)
{
m = _mm_add_ps(m, v);
}
__forceinline void operator -= (const GSVector4& v)
__forceinline void operator-=(const GSVector4& v)
{
m = _mm_sub_ps(m, v);
}
__forceinline void operator *= (const GSVector4& v)
__forceinline void operator*=(const GSVector4& v)
{
m = _mm_mul_ps(m, v);
}
__forceinline void operator /= (const GSVector4& v)
__forceinline void operator/=(const GSVector4& v)
{
m = _mm_div_ps(m, v);
}
__forceinline void operator += (float f)
__forceinline void operator+=(float f)
{
*this += GSVector4(f);
}
__forceinline void operator -= (float f)
__forceinline void operator-=(float f)
{
*this -= GSVector4(f);
}
__forceinline void operator *= (float f)
__forceinline void operator*=(float f)
{
*this *= GSVector4(f);
}
__forceinline void operator /= (float f)
__forceinline void operator/=(float f)
{
*this /= GSVector4(f);
}
__forceinline void operator &= (const GSVector4& v)
__forceinline void operator&=(const GSVector4& v)
{
m = _mm_and_ps(m, v);
}
__forceinline void operator |= (const GSVector4& v)
__forceinline void operator|=(const GSVector4& v)
{
m = _mm_or_ps(m, v);
}
__forceinline void operator ^= (const GSVector4& v)
__forceinline void operator^=(const GSVector4& v)
{
m = _mm_xor_ps(m, v);
}
__forceinline friend GSVector4 operator + (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator+(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_add_ps(v1, v2));
}
__forceinline friend GSVector4 operator - (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator-(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_sub_ps(v1, v2));
}
__forceinline friend GSVector4 operator * (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator*(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_mul_ps(v1, v2));
}
__forceinline friend GSVector4 operator / (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator/(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_div_ps(v1, v2));
}
__forceinline friend GSVector4 operator + (const GSVector4& v, float f)
__forceinline friend GSVector4 operator+(const GSVector4& v, float f)
{
return v + GSVector4(f);
}
__forceinline friend GSVector4 operator - (const GSVector4& v, float f)
__forceinline friend GSVector4 operator-(const GSVector4& v, float f)
{
return v - GSVector4(f);
}
__forceinline friend GSVector4 operator * (const GSVector4& v, float f)
__forceinline friend GSVector4 operator*(const GSVector4& v, float f)
{
return v * GSVector4(f);
}
__forceinline friend GSVector4 operator / (const GSVector4& v, float f)
__forceinline friend GSVector4 operator/(const GSVector4& v, float f)
{
return v / GSVector4(f);
}
__forceinline friend GSVector4 operator & (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator&(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_and_ps(v1, v2));
}
__forceinline friend GSVector4 operator | (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator|(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_or_ps(v1, v2));
}
__forceinline friend GSVector4 operator ^ (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator^(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_xor_ps(v1, v2));
}
__forceinline friend GSVector4 operator == (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator==(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_cmpeq_ps(v1, v2));
}
__forceinline friend GSVector4 operator != (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator!=(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_cmpneq_ps(v1, v2));
}
__forceinline friend GSVector4 operator > (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator>(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_cmpgt_ps(v1, v2));
}
__forceinline friend GSVector4 operator < (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator<(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_cmplt_ps(v1, v2));
}
__forceinline friend GSVector4 operator >= (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator>=(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_cmpge_ps(v1, v2));
}
__forceinline friend GSVector4 operator <= (const GSVector4& v1, const GSVector4& v2)
__forceinline friend GSVector4 operator<=(const GSVector4& v1, const GSVector4& v2)
{
return GSVector4(_mm_cmple_ps(v1, v2));
}
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
__forceinline GSVector4 xs##ys##zs##ws() const {return GSVector4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
__forceinline GSVector4 xs##ys##zs##ws(const GSVector4& v) const {return GSVector4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
__forceinline GSVector4 xs##ys##zs##ws() const { return GSVector4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn))); } \
__forceinline GSVector4 xs##ys##zs##ws(const GSVector4& v) const { return GSVector4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn))); } \
#define VECTOR4_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \
@ -884,7 +895,7 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
VECTOR4_SHUFFLE_1(z, 2)
VECTOR4_SHUFFLE_1(w, 3)
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
__forceinline GSVector4 broadcast32() const
{
@ -901,5 +912,5 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
return GSVector4(_mm_broadcastss_ps(_mm_load_ss((const float*)f)));
}
#endif
#endif
};

File diff suppressed because it is too large Load Diff

View File

@ -62,8 +62,8 @@ class alignas(32) GSVector8
public:
union
{
struct {float x0, y0, z0, w0, x1, y1, z1, w1;};
struct {float r0, g0, b0, a0, r1, g1, b1, a1;};
struct { float x0, y0, z0, w0, x1, y1, z1, w1; };
struct { float r0, g0, b0, a0, r1, g1, b1, a1; };
float v[8];
float f32[8];
int8 i8[32];
@ -126,15 +126,15 @@ public:
__forceinline GSVector8(__m128 m0, __m128 m1)
{
#if 0 // _MSC_VER >= 1700
#if 0 // _MSC_VER >= 1700
this->m = _mm256_permute2f128_ps(_mm256_castps128_ps256(m0), _mm256_castps128_ps256(m1), 0x20);
#else
#else
this->m = zero().insert<0>(m0).insert<1>(m1);
#endif
#endif
}
constexpr GSVector8(const GSVector8& v) = default;
@ -146,17 +146,17 @@ public:
__forceinline explicit GSVector8(int i)
{
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
m = _mm256_cvtepi32_ps(_mm256_broadcastd_epi32(_mm_cvtsi32_si128(i)));
#else
#else
GSVector4i v((int)i);
*this = GSVector4(v);
#endif
#endif
}
__forceinline explicit GSVector8(__m128 m)
@ -169,41 +169,41 @@ public:
{
}
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
__forceinline explicit GSVector8(const GSVector8i& v);
__forceinline static GSVector8 cast(const GSVector8i& v);
#endif
#endif
__forceinline static GSVector8 cast(const GSVector4& v);
__forceinline static GSVector8 cast(const GSVector4i& v);
__forceinline void operator = (const GSVector8& v)
__forceinline void operator=(const GSVector8& v)
{
m = v.m;
}
__forceinline void operator = (float f)
__forceinline void operator=(float f)
{
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
m = _mm256_broadcastss_ps(_mm_load_ss(&f));
m = _mm256_broadcastss_ps(_mm_load_ss(&f));
#else
#else
m = _mm256_set1_ps(f);
#endif
#endif
}
__forceinline void operator = (__m128 m)
__forceinline void operator=(__m128 m)
{
this->m = _mm256_insertf128_ps(_mm256_castps128_ps256(m), m, 1);
}
__forceinline void operator = (__m256 m)
__forceinline void operator=(__m256 m)
{
this->m = m;
}
@ -215,28 +215,28 @@ public:
__forceinline GSVector8 abs() const
{
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
return *this & cast(GSVector8i::x7fffffff());
#else
#else
return *this & m_x7fffffff;
#endif
#endif
}
__forceinline GSVector8 neg() const
{
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
return *this ^ cast(GSVector8i::x80000000());
#else
#else
return *this ^ m_x80000000;
#endif
#endif
}
__forceinline GSVector8 rcp() const
@ -251,7 +251,8 @@ public:
return (v + v) - (v * v) * *this;
}
template<int mode> __forceinline GSVector8 round() const
template <int mode>
__forceinline GSVector8 round() const
{
return GSVector8(_mm256_round_ps(m, mode));
}
@ -266,14 +267,14 @@ public:
return round<Round_PosInf>();
}
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
#define LOG8_POLY0(x, c0) GSVector8(c0)
#define LOG8_POLY1(x, c0, c1) (LOG8_POLY0(x, c1).madd(x, GSVector8(c0)))
#define LOG8_POLY2(x, c0, c1, c2) (LOG8_POLY1(x, c1, c2).madd(x, GSVector8(c0)))
#define LOG8_POLY3(x, c0, c1, c2, c3) (LOG8_POLY2(x, c1, c2, c3).madd(x, GSVector8(c0)))
#define LOG8_POLY4(x, c0, c1, c2, c3, c4) (LOG8_POLY3(x, c1, c2, c3, c4).madd(x, GSVector8(c0)))
#define LOG8_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG8_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector8(c0)))
#define LOG8_POLY0(x, c0) GSVector8(c0)
#define LOG8_POLY1(x, c0, c1) (LOG8_POLY0(x, c1).madd(x, GSVector8(c0)))
#define LOG8_POLY2(x, c0, c1, c2) (LOG8_POLY1(x, c1, c2).madd(x, GSVector8(c0)))
#define LOG8_POLY3(x, c0, c1, c2, c3) (LOG8_POLY2(x, c1, c2, c3).madd(x, GSVector8(c0)))
#define LOG8_POLY4(x, c0, c1, c2, c3, c4) (LOG8_POLY3(x, c1, c2, c3, c4).madd(x, GSVector8(c0)))
#define LOG8_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG8_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector8(c0)))
__forceinline GSVector8 log2(int precision = 5) const
{
@ -288,21 +289,21 @@ public:
GSVector8 p;
switch(precision)
switch (precision)
{
case 3:
p = LOG8_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
break;
case 4:
p = LOG8_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
break;
default:
case 5:
p = LOG8_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
break;
case 6:
p = LOG8_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f);
break;
case 3:
p = LOG8_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
break;
case 4:
p = LOG8_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
break;
default:
case 5:
p = LOG8_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
break;
case 6:
p = LOG8_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f);
break;
}
// This effectively increases the polynomial degree by one, but ensures that log2(1) == 0
@ -312,58 +313,58 @@ public:
return p + e;
}
#endif
#endif
__forceinline GSVector8 madd(const GSVector8& a, const GSVector8& b) const
{
#if 0//_M_SSE >= 0x501
#if 0 //_M_SSE >= 0x501
return GSVector8(_mm256_fmadd_ps(m, a, b));
#else
#else
return *this * a + b;
#endif
#endif
}
__forceinline GSVector8 msub(const GSVector8& a, const GSVector8& b) const
{
#if 0//_M_SSE >= 0x501
#if 0 //_M_SSE >= 0x501
return GSVector8(_mm256_fmsub_ps(m, a, b));
#else
#else
return *this * a - b;
#endif
#endif
}
__forceinline GSVector8 nmadd(const GSVector8& a, const GSVector8& b) const
{
#if 0//_M_SSE >= 0x501
#if 0 //_M_SSE >= 0x501
return GSVector8(_mm256_fnmadd_ps(m, a, b));
#else
#else
return b - *this * a;
#endif
#endif
}
__forceinline GSVector8 nmsub(const GSVector8& a, const GSVector8& b) const
{
#if 0//_M_SSE >= 0x501
#if 0 //_M_SSE >= 0x501
return GSVector8(_mm256_fnmsub_ps(m, a, b));
#else
#else
return -b - *this * a;
#endif
#endif
}
__forceinline GSVector8 addm(const GSVector8& a, const GSVector8& b) const
@ -396,7 +397,8 @@ public:
return GSVector8(_mm256_hsub_ps(m, v.m));
}
template<int i> __forceinline GSVector8 dp(const GSVector8& v) const
template <int i>
__forceinline GSVector8 dp(const GSVector8& v) const
{
return GSVector8(_mm256_dp_ps(m, v.m, i));
}
@ -431,12 +433,13 @@ public:
return GSVector8(_mm256_max_ps(m, a));
}
template<int mask> __forceinline GSVector8 blend32(const GSVector8& a) const
template <int mask>
__forceinline GSVector8 blend32(const GSVector8& a) const
{
return GSVector8(_mm256_blend_ps(m, a, mask));
}
__forceinline GSVector8 blend32(const GSVector8& a, const GSVector8& mask) const
__forceinline GSVector8 blend32(const GSVector8& a, const GSVector8& mask) const
{
return GSVector8(_mm256_blendv_ps(m, a, mask));
}
@ -496,80 +499,85 @@ public:
return v.blend32(*this, *this == *this);
}
template<int src, int dst> __forceinline GSVector8 insert32(const GSVector8& v) const
template <int src, int dst>
__forceinline GSVector8 insert32(const GSVector8& v) const
{
// TODO: use blendps when src == dst
ASSERT(src < 4 && dst < 4); // not cross lane like extract32()
switch(dst)
switch (dst)
{
case 0:
switch(src)
{
case 0: return yyxx(v).zxzw(*this);
case 1: return yyyy(v).zxzw(*this);
case 2: return yyzz(v).zxzw(*this);
case 3: return yyww(v).zxzw(*this);
default: __assume(0);
}
break;
case 1:
switch(src)
{
case 0: return xxxx(v).xzzw(*this);
case 1: return xxyy(v).xzzw(*this);
case 2: return xxzz(v).xzzw(*this);
case 3: return xxww(v).xzzw(*this);
default: __assume(0);
}
break;
case 2:
switch(src)
{
case 0: return xyzx(wwxx(v));
case 1: return xyzx(wwyy(v));
case 2: return xyzx(wwzz(v));
case 3: return xyzx(wwww(v));
default: __assume(0);
}
break;
case 3:
switch(src)
{
case 0: return xyxz(zzxx(v));
case 1: return xyxz(zzyy(v));
case 2: return xyxz(zzzz(v));
case 3: return xyxz(zzww(v));
default: __assume(0);
}
break;
default:
__assume(0);
case 0:
switch (src)
{
case 0: return yyxx(v).zxzw(*this);
case 1: return yyyy(v).zxzw(*this);
case 2: return yyzz(v).zxzw(*this);
case 3: return yyww(v).zxzw(*this);
default: __assume(0);
}
break;
case 1:
switch (src)
{
case 0: return xxxx(v).xzzw(*this);
case 1: return xxyy(v).xzzw(*this);
case 2: return xxzz(v).xzzw(*this);
case 3: return xxww(v).xzzw(*this);
default: __assume(0);
}
break;
case 2:
switch (src)
{
case 0: return xyzx(wwxx(v));
case 1: return xyzx(wwyy(v));
case 2: return xyzx(wwzz(v));
case 3: return xyzx(wwww(v));
default: __assume(0);
}
break;
case 3:
switch (src)
{
case 0: return xyxz(zzxx(v));
case 1: return xyxz(zzyy(v));
case 2: return xyxz(zzzz(v));
case 3: return xyxz(zzww(v));
default: __assume(0);
}
break;
default:
__assume(0);
}
return *this;
}
template<int i> __forceinline int extract32() const
template <int i>
__forceinline int extract32() const
{
ASSERT(i < 8);
return extract<i / 4>().template extract32<i & 3>();
}
template<int i> __forceinline GSVector8 insert(__m128 m) const
template <int i>
__forceinline GSVector8 insert(__m128 m) const
{
ASSERT(i < 2);
return GSVector8(_mm256_insertf128_ps(this->m, m, i));
}
template<int i> __forceinline GSVector4 extract() const
template <int i>
__forceinline GSVector4 extract() const
{
ASSERT(i < 2);
if(i == 0) return GSVector4(_mm256_castps256_ps128(m));
if (i == 0)
return GSVector4(_mm256_castps256_ps128(m));
return GSVector4(_mm256_extractf128_ps(m, i));
}
@ -606,7 +614,8 @@ public:
return loadh(ph, loadl(pl));
}
template<bool aligned> __forceinline static GSVector8 load(const void* p)
template <bool aligned>
__forceinline static GSVector8 load(const void* p)
{
return GSVector8(aligned ? _mm256_load_ps((const float*)p) : _mm256_loadu_ps((const float*)p));
}
@ -623,10 +632,13 @@ public:
_mm_store_ps((float*)p, _mm256_extractf128_ps(v.m, 1));
}
template<bool aligned> __forceinline static void store(void* p, const GSVector8& v)
template <bool aligned>
__forceinline static void store(void* p, const GSVector8& v)
{
if(aligned) _mm256_store_ps((float*)p, v.m);
else _mm256_storeu_ps((float*)p, v.m);
if (aligned)
_mm256_store_ps((float*)p, v.m);
else
_mm256_storeu_ps((float*)p, v.m);
}
//
@ -643,147 +655,147 @@ public:
//
__forceinline GSVector8 operator - () const
__forceinline GSVector8 operator-() const
{
return neg();
}
__forceinline void operator += (const GSVector8& v)
__forceinline void operator+=(const GSVector8& v)
{
m = _mm256_add_ps(m, v);
}
__forceinline void operator -= (const GSVector8& v)
__forceinline void operator-=(const GSVector8& v)
{
m = _mm256_sub_ps(m, v);
}
__forceinline void operator *= (const GSVector8& v)
__forceinline void operator*=(const GSVector8& v)
{
m = _mm256_mul_ps(m, v);
}
__forceinline void operator /= (const GSVector8& v)
__forceinline void operator/=(const GSVector8& v)
{
m = _mm256_div_ps(m, v);
}
__forceinline void operator += (float f)
__forceinline void operator+=(float f)
{
*this += GSVector8(f);
}
__forceinline void operator -= (float f)
__forceinline void operator-=(float f)
{
*this -= GSVector8(f);
}
__forceinline void operator *= (float f)
__forceinline void operator*=(float f)
{
*this *= GSVector8(f);
}
__forceinline void operator /= (float f)
__forceinline void operator/=(float f)
{
*this /= GSVector8(f);
}
__forceinline void operator &= (const GSVector8& v)
__forceinline void operator&=(const GSVector8& v)
{
m = _mm256_and_ps(m, v);
}
__forceinline void operator |= (const GSVector8& v)
__forceinline void operator|=(const GSVector8& v)
{
m = _mm256_or_ps(m, v);
}
__forceinline void operator ^= (const GSVector8& v)
__forceinline void operator^=(const GSVector8& v)
{
m = _mm256_xor_ps(m, v);
}
__forceinline friend GSVector8 operator + (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator+(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_add_ps(v1, v2));
}
__forceinline friend GSVector8 operator - (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator-(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_sub_ps(v1, v2));
}
__forceinline friend GSVector8 operator * (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator*(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_mul_ps(v1, v2));
}
__forceinline friend GSVector8 operator / (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator/(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_div_ps(v1, v2));
}
__forceinline friend GSVector8 operator + (const GSVector8& v, float f)
__forceinline friend GSVector8 operator+(const GSVector8& v, float f)
{
return v + GSVector8(f);
}
__forceinline friend GSVector8 operator - (const GSVector8& v, float f)
__forceinline friend GSVector8 operator-(const GSVector8& v, float f)
{
return v - GSVector8(f);
}
__forceinline friend GSVector8 operator * (const GSVector8& v, float f)
__forceinline friend GSVector8 operator*(const GSVector8& v, float f)
{
return v * GSVector8(f);
}
__forceinline friend GSVector8 operator / (const GSVector8& v, float f)
__forceinline friend GSVector8 operator/(const GSVector8& v, float f)
{
return v / GSVector8(f);
}
__forceinline friend GSVector8 operator & (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator&(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_and_ps(v1, v2));
}
__forceinline friend GSVector8 operator | (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator|(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_or_ps(v1, v2));
}
__forceinline friend GSVector8 operator ^ (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator^(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_xor_ps(v1, v2));
}
__forceinline friend GSVector8 operator == (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator==(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_EQ_OQ));
}
__forceinline friend GSVector8 operator != (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator!=(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_NEQ_OQ));
}
__forceinline friend GSVector8 operator > (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator>(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_GT_OQ));
}
__forceinline friend GSVector8 operator < (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator<(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_LT_OQ));
}
__forceinline friend GSVector8 operator >= (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator>=(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_GE_OQ));
}
__forceinline friend GSVector8 operator <= (const GSVector8& v1, const GSVector8& v2)
__forceinline friend GSVector8 operator<=(const GSVector8& v1, const GSVector8& v2)
{
return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_LE_OQ));
}
@ -795,11 +807,11 @@ public:
#define VECTOR8_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
__forceinline GSVector8 xs##ys##zs##ws() const {return GSVector8(_mm256_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
__forceinline GSVector8 xs##ys##zs##ws(const GSVector8& v) const {return GSVector8(_mm256_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));}
__forceinline GSVector8 xs##ys##zs##ws() const { return GSVector8(_mm256_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn))); } \
__forceinline GSVector8 xs##ys##zs##ws(const GSVector8& v) const { return GSVector8(_mm256_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn))); }
// vs2012u3 cannot reuse the result of equivalent shuffles when it is done with _mm256_permute_ps (write v.xxxx() twice, and it will do it twice), but with _mm256_shuffle_ps it can.
//__forceinline GSVector8 xs##ys##zs##ws() const {return GSVector8(_mm256_permute_ps(m, _MM_SHUFFLE(wn, zn, yn, xn)));}
//__forceinline GSVector8 xs##ys##zs##ws() const { return GSVector8(_mm256_permute_ps(m, _MM_SHUFFLE(wn, zn, yn, xn))); }
#define VECTOR8_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \
VECTOR8_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \
@ -831,8 +843,8 @@ public:
// _ = 0
#define VECTOR8_PERMUTE128_2(as, an, bs, bn) \
__forceinline GSVector8 as##bs() const {return GSVector8(_mm256_permute2f128_ps(m, m, an | (bn << 4)));} \
__forceinline GSVector8 as##bs(const GSVector8& v) const {return GSVector8(_mm256_permute2f128_ps(m, v.m, an | (bn << 4)));} \
__forceinline GSVector8 as##bs() const { return GSVector8(_mm256_permute2f128_ps(m, m, an | (bn << 4))); } \
__forceinline GSVector8 as##bs(const GSVector8& v) const { return GSVector8(_mm256_permute2f128_ps(m, v.m, an | (bn << 4))); } \
#define VECTOR8_PERMUTE128_1(as, an) \
VECTOR8_PERMUTE128_2(as, an, a, 0) \
@ -847,7 +859,7 @@ public:
VECTOR8_PERMUTE128_1(d, 3)
VECTOR8_PERMUTE128_1(_, 8)
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
// a = v[63:0]
// b = v[127:64]
@ -855,7 +867,7 @@ public:
// d = v[255:192]
#define VECTOR8_PERMUTE64_4(as, an, bs, bn, cs, cn, ds, dn) \
__forceinline GSVector8 as##bs##cs##ds() const {return GSVector8(_mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(m), _MM_SHUFFLE(dn, cn, bn, an))));} \
__forceinline GSVector8 as##bs##cs##ds() const { return GSVector8(_mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(m), _MM_SHUFFLE(dn, cn, bn, an)))); } \
#define VECTOR8_PERMUTE64_3(as, an, bs, bn, cs, cn) \
VECTOR8_PERMUTE64_4(as, an, bs, bn, cs, cn, a, 0) \
@ -902,7 +914,7 @@ public:
// TODO: v.(x0|y0|z0|w0|x1|y1|z1|w1) // broadcast element
#endif
#endif
};
#endif

View File

@ -72,8 +72,8 @@ class alignas(32) GSVector8i
public:
union
{
struct {int x0, y0, z0, w0, x1, y1, z1, w1;};
struct {int r0, g0, b0, a0, r1, g1, b1, a1;};
struct { int x0, y0, z0, w0, x1, y1, z1, w1; };
struct { int r0, g0, b0, a0, r1, g1, b1, a1; };
int v[8];
float f32[8];
int8 i8[32];
@ -131,15 +131,15 @@ public:
__forceinline GSVector8i(__m128i m0, __m128i m1)
{
#if 0 // _MSC_VER >= 1700
#if 0 // _MSC_VER >= 1700
this->m = _mm256_permute2x128_si256(_mm256_castsi128_si256(m0), _mm256_castsi128_si256(m1), 0);
#else
#else
*this = zero().insert<0>(m0).insert<1>(m1);
#endif
#endif
}
GSVector8i(const GSVector8i& v) = default;
@ -159,22 +159,22 @@ public:
{
}
__forceinline void operator = (const GSVector8i& v)
__forceinline void operator=(const GSVector8i& v)
{
m = v.m;
}
__forceinline void operator = (int i)
__forceinline void operator=(int i)
{
m = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(i)); // m = _mm256_set1_epi32(i);
}
__forceinline void operator = (__m128i m)
__forceinline void operator=(__m128i m)
{
this->m = _mm256_inserti128_si256(_mm256_castsi128_si256(m), m, 1);
}
__forceinline void operator = (__m256i m)
__forceinline void operator=(__m256i m)
{
this->m = m;
}
@ -316,7 +316,8 @@ public:
return GSVector8i(_mm256_blendv_epi8(m, a, mask));
}
template<int mask> __forceinline GSVector8i blend16(const GSVector8i& a) const
template <int mask>
__forceinline GSVector8i blend16(const GSVector8i& a) const
{
return GSVector8i(_mm256_blend_epi16(m, a, mask));
}
@ -522,77 +523,80 @@ public:
static __forceinline GSVector8i i8to16c(const void* p)
{
return GSVector8i(_mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)p)));
return GSVector8i(_mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i u8to16c(const void* p)
{
return GSVector8i(_mm256_cvtepu8_epi16(_mm_load_si128((__m128i*)p)));
return GSVector8i(_mm256_cvtepu8_epi16(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i i8to32c(const void* p)
{
return GSVector8i(_mm256_cvtepi8_epi32(_mm_loadl_epi64((__m128i*)p)));
return GSVector8i(_mm256_cvtepi8_epi32(_mm_loadl_epi64((__m128i*)p)));
}
static __forceinline GSVector8i u8to32c(const void* p)
{
return GSVector8i(_mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)p)));
return GSVector8i(_mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)p)));
}
static __forceinline GSVector8i i8to64c(int i)
{
return GSVector8i(_mm256_cvtepi8_epi64(_mm_cvtsi32_si128(i)));
return GSVector8i(_mm256_cvtepi8_epi64(_mm_cvtsi32_si128(i)));
}
static __forceinline GSVector8i u8to64c(int i)
{
return GSVector8i(_mm256_cvtepu8_epi64(_mm_cvtsi32_si128(i)));
return GSVector8i(_mm256_cvtepu8_epi64(_mm_cvtsi32_si128(i)));
}
static __forceinline GSVector8i i16to32c(const void* p)
{
return GSVector8i(_mm256_cvtepi16_epi32(_mm_load_si128((__m128i*)p)));
return GSVector8i(_mm256_cvtepi16_epi32(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i u16to32c(const void* p)
{
return GSVector8i(_mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)p)));
return GSVector8i(_mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i i16to64c(const void* p)
{
return GSVector8i(_mm256_cvtepi16_epi64(_mm_loadl_epi64((__m128i*)p)));
return GSVector8i(_mm256_cvtepi16_epi64(_mm_loadl_epi64((__m128i*)p)));
}
static __forceinline GSVector8i u16to64c(const void* p)
{
return GSVector8i(_mm256_cvtepu16_epi64(_mm_loadl_epi64((__m128i*)p)));
return GSVector8i(_mm256_cvtepu16_epi64(_mm_loadl_epi64((__m128i*)p)));
}
static __forceinline GSVector8i i32to64c(const void* p)
{
return GSVector8i(_mm256_cvtepi32_epi64(_mm_load_si128((__m128i*)p)));
return GSVector8i(_mm256_cvtepi32_epi64(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i u32to64c(const void* p)
{
return GSVector8i(_mm256_cvtepu32_epi64(_mm_load_si128((__m128i*)p)));
return GSVector8i(_mm256_cvtepu32_epi64(_mm_load_si128((__m128i*)p)));
}
//
template<int i> __forceinline GSVector8i srl() const
template <int i>
__forceinline GSVector8i srl() const
{
return GSVector8i(_mm256_srli_si256(m, i));
}
template<int i> __forceinline GSVector8i srl(const GSVector8i& v)
template <int i>
__forceinline GSVector8i srl(const GSVector8i& v)
{
return GSVector8i(_mm256_alignr_epi8(v.m, m, i));
}
template<int i> __forceinline GSVector8i sll() const
template <int i>
__forceinline GSVector8i sll() const
{
return GSVector8i(_mm256_slli_si256(m, i));
//return GSVector8i(_mm256_slli_si128(m, i));
@ -848,21 +852,24 @@ public:
return GSVector8i(_mm256_madd_epi16(m, v.m));
}
template<int shift> __forceinline GSVector8i lerp16(const GSVector8i& a, const GSVector8i& f) const
template <int shift>
__forceinline GSVector8i lerp16(const GSVector8i& a, const GSVector8i& f) const
{
// (a - this) * f << shift + this
return add16(a.sub16(*this).modulate16<shift>(f));
}
template<int shift> __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c)
template <int shift>
__forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c)
{
// (a - b) * c << shift
return a.sub16(b).modulate16<shift>(c);
}
template<int shift> __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c, const GSVector8i& d)
template <int shift>
__forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c, const GSVector8i& d)
{
// (a - b) * c << shift + d
@ -876,11 +883,12 @@ public:
return add16(a.sub16(*this).mul16l(f).sra16(4));
}
template<int shift> __forceinline GSVector8i modulate16(const GSVector8i& f) const
template <int shift>
__forceinline GSVector8i modulate16(const GSVector8i& f) const
{
// a * f << shift
if(shift == 0)
if (shift == 0)
{
return mul16hrs(f);
}
@ -977,7 +985,8 @@ public:
// TODO: extract/insert
template<int i> __forceinline int extract8() const
template <int i>
__forceinline int extract8() const
{
ASSERT(i < 32);
@ -986,7 +995,8 @@ public:
return v.extract8<i & 15>();
}
template<int i> __forceinline int extract16() const
template <int i>
__forceinline int extract16() const
{
ASSERT(i < 16);
@ -995,27 +1005,32 @@ public:
return v.extract16<i & 8>();
}
template<int i> __forceinline int extract32() const
template <int i>
__forceinline int extract32() const
{
ASSERT(i < 8);
GSVector4i v = extract<i / 4>();
if((i & 3) == 0) return GSVector4i::store(v);
if ((i & 3) == 0)
return GSVector4i::store(v);
return v.extract32<i & 3>();
}
template<int i> __forceinline GSVector4i extract() const
template <int i>
__forceinline GSVector4i extract() const
{
ASSERT(i < 2);
if(i == 0) return GSVector4i(_mm256_castsi256_si128(m));
if (i == 0)
return GSVector4i(_mm256_castsi256_si128(m));
return GSVector4i(_mm256_extracti128_si256(m, i));
}
template<int i> __forceinline GSVector8i insert(__m128i m) const
template <int i>
__forceinline GSVector8i insert(__m128i m) const
{
ASSERT(i < 2);
@ -1024,7 +1039,8 @@ public:
// TODO: gather
template<class T> __forceinline GSVector8i gather32_32(const T* ptr) const
template <class T>
__forceinline GSVector8i gather32_32(const T* ptr) const
{
GSVector4i v0;
GSVector4i v1;
@ -1060,7 +1076,8 @@ public:
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
}
template<class T1, class T2> __forceinline GSVector8i gather32_32(const T1* ptr1, const T2* ptr2) const
template <class T1, class T2>
__forceinline GSVector8i gather32_32(const T1* ptr1, const T2* ptr2) const
{
GSVector4i v0;
GSVector4i v1;
@ -1091,7 +1108,8 @@ public:
return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2);
}
template<class T> __forceinline void gather32_32(const T* RESTRICT ptr, GSVector8i* RESTRICT dst) const
template <class T>
__forceinline void gather32_32(const T* RESTRICT ptr, GSVector8i* RESTRICT dst) const
{
dst[0] = gather32_32<>(ptr);
}
@ -1144,7 +1162,8 @@ public:
// return GSVector8i(l).insert<1>(h);
}
template<bool aligned> __forceinline static GSVector8i load(const void* p)
template <bool aligned>
__forceinline static GSVector8i load(const void* p)
{
return GSVector8i(aligned ? _mm256_load_si256((__m256i*)p) : _mm256_loadu_si256((__m256i*)p));
}
@ -1154,14 +1173,14 @@ public:
return cast(GSVector4i::load(i));
}
#ifdef _M_AMD64
#ifdef _M_AMD64
__forceinline static GSVector8i loadq(int64 i)
{
return cast(GSVector4i::loadq(i));
}
#endif
#endif
__forceinline static void storent(void* p, const GSVector8i& v)
{
@ -1184,10 +1203,13 @@ public:
GSVector8i::storeh(ph, v);
}
template<bool aligned> __forceinline static void store(void* p, const GSVector8i& v)
template <bool aligned>
__forceinline static void store(void* p, const GSVector8i& v)
{
if(aligned) _mm256_store_si256((__m256i*)p, v.m);
else _mm256_storeu_si256((__m256i*)p, v.m);
if (aligned)
_mm256_store_si256((__m256i*)p, v.m);
else
_mm256_storeu_si256((__m256i*)p, v.m);
}
__forceinline static int store(const GSVector8i& v)
@ -1195,26 +1217,27 @@ public:
return GSVector4i::store(GSVector4i::cast(v));
}
#ifdef _M_AMD64
#ifdef _M_AMD64
__forceinline static int64 storeq(const GSVector8i& v)
{
return GSVector4i::storeq(GSVector4i::cast(v));
}
#endif
#endif
__forceinline static void storent(void* RESTRICT dst, const void* RESTRICT src, size_t size)
{
const GSVector8i* s = (const GSVector8i*)src;
GSVector8i* d = (GSVector8i*)dst;
if(size == 0) return;
if (size == 0)
return;
size_t i = 0;
size_t j = size >> 7;
for(; i < j; i++, s += 4, d += 4)
for (; i < j; i++, s += 4, d += 4)
{
storent(&d[0], s[0]);
storent(&d[1], s[1]);
@ -1224,7 +1247,8 @@ public:
size &= 127;
if(size == 0) return;
if (size == 0)
return;
memcpy(d, s, size);
}
@ -1348,142 +1372,142 @@ public:
d = f.bd(d);
}
__forceinline void operator += (const GSVector8i& v)
__forceinline void operator+=(const GSVector8i& v)
{
m = _mm256_add_epi32(m, v);
}
__forceinline void operator -= (const GSVector8i& v)
__forceinline void operator-=(const GSVector8i& v)
{
m = _mm256_sub_epi32(m, v);
}
__forceinline void operator += (int i)
__forceinline void operator+=(int i)
{
*this += GSVector8i(i);
}
__forceinline void operator -= (int i)
__forceinline void operator-=(int i)
{
*this -= GSVector8i(i);
}
__forceinline void operator <<= (const int i)
__forceinline void operator<<=(const int i)
{
m = _mm256_slli_epi32(m, i);
}
__forceinline void operator >>= (const int i)
__forceinline void operator>>=(const int i)
{
m = _mm256_srli_epi32(m, i);
}
__forceinline void operator &= (const GSVector8i& v)
__forceinline void operator&=(const GSVector8i& v)
{
m = _mm256_and_si256(m, v);
}
__forceinline void operator |= (const GSVector8i& v)
__forceinline void operator|=(const GSVector8i& v)
{
m = _mm256_or_si256(m, v);
}
__forceinline void operator ^= (const GSVector8i& v)
__forceinline void operator^=(const GSVector8i& v)
{
m = _mm256_xor_si256(m, v);
}
__forceinline friend GSVector8i operator + (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator+(const GSVector8i& v1, const GSVector8i& v2)
{
return GSVector8i(_mm256_add_epi32(v1, v2));
}
__forceinline friend GSVector8i operator - (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator-(const GSVector8i& v1, const GSVector8i& v2)
{
return GSVector8i(_mm256_sub_epi32(v1, v2));
}
__forceinline friend GSVector8i operator + (const GSVector8i& v, int i)
__forceinline friend GSVector8i operator+(const GSVector8i& v, int i)
{
return v + GSVector8i(i);
}
__forceinline friend GSVector8i operator - (const GSVector8i& v, int i)
__forceinline friend GSVector8i operator-(const GSVector8i& v, int i)
{
return v - GSVector8i(i);
}
__forceinline friend GSVector8i operator << (const GSVector8i& v, const int i)
__forceinline friend GSVector8i operator<<(const GSVector8i& v, const int i)
{
return GSVector8i(_mm256_slli_epi32(v, i));
}
__forceinline friend GSVector8i operator >> (const GSVector8i& v, const int i)
__forceinline friend GSVector8i operator>>(const GSVector8i& v, const int i)
{
return GSVector8i(_mm256_srli_epi32(v, i));
}
__forceinline friend GSVector8i operator & (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator&(const GSVector8i& v1, const GSVector8i& v2)
{
return GSVector8i(_mm256_and_si256(v1, v2));
}
__forceinline friend GSVector8i operator | (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator|(const GSVector8i& v1, const GSVector8i& v2)
{
return GSVector8i(_mm256_or_si256(v1, v2));
}
__forceinline friend GSVector8i operator ^ (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator^(const GSVector8i& v1, const GSVector8i& v2)
{
return GSVector8i(_mm256_xor_si256(v1, v2));
}
__forceinline friend GSVector8i operator & (const GSVector8i& v, int i)
__forceinline friend GSVector8i operator&(const GSVector8i& v, int i)
{
return v & GSVector8i(i);
}
__forceinline friend GSVector8i operator | (const GSVector8i& v, int i)
__forceinline friend GSVector8i operator|(const GSVector8i& v, int i)
{
return v | GSVector8i(i);
}
__forceinline friend GSVector8i operator ^ (const GSVector8i& v, int i)
__forceinline friend GSVector8i operator^(const GSVector8i& v, int i)
{
return v ^ GSVector8i(i);
}
__forceinline friend GSVector8i operator ~ (const GSVector8i& v)
__forceinline friend GSVector8i operator~(const GSVector8i& v)
{
return v ^ (v == v);
}
__forceinline friend GSVector8i operator == (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator==(const GSVector8i& v1, const GSVector8i& v2)
{
return GSVector8i(_mm256_cmpeq_epi32(v1, v2));
}
__forceinline friend GSVector8i operator != (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator!=(const GSVector8i& v1, const GSVector8i& v2)
{
return ~(v1 == v2);
}
__forceinline friend GSVector8i operator > (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator>(const GSVector8i& v1, const GSVector8i& v2)
{
return GSVector8i(_mm256_cmpgt_epi32(v1, v2));
}
__forceinline friend GSVector8i operator < (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator<(const GSVector8i& v1, const GSVector8i& v2)
{
return GSVector8i(_mm256_cmpgt_epi32(v2, v1));
}
__forceinline friend GSVector8i operator >= (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator>=(const GSVector8i& v1, const GSVector8i& v2)
{
return (v1 > v2) | (v1 == v2);
}
__forceinline friend GSVector8i operator <= (const GSVector8i& v1, const GSVector8i& v2)
__forceinline friend GSVector8i operator<=(const GSVector8i& v1, const GSVector8i& v2)
{
return (v1 < v2) | (v1 == v2);
}
@ -1494,10 +1518,10 @@ public:
// w = v[127:96] / v[255:224]
#define VECTOR8i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
__forceinline GSVector8i xs##ys##zs##ws() const {return GSVector8i(_mm256_shuffle_epi32(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
__forceinline GSVector8i xs##ys##zs##ws##l() const {return GSVector8i(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
__forceinline GSVector8i xs##ys##zs##ws##h() const {return GSVector8i(_mm256_shufflehi_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
__forceinline GSVector8i xs##ys##zs##ws##lh() const {return GSVector8i(_mm256_shufflehi_epi16(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)), _MM_SHUFFLE(wn, zn, yn, xn)));} \
__forceinline GSVector8i xs##ys##zs##ws() const { return GSVector8i(_mm256_shuffle_epi32(m, _MM_SHUFFLE(wn, zn, yn, xn))); } \
__forceinline GSVector8i xs##ys##zs##ws##l() const { return GSVector8i(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn))); } \
__forceinline GSVector8i xs##ys##zs##ws##h() const { return GSVector8i(_mm256_shufflehi_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn))); } \
__forceinline GSVector8i xs##ys##zs##ws##lh() const { return GSVector8i(_mm256_shufflehi_epi16(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)), _MM_SHUFFLE(wn, zn, yn, xn))); } \
#define VECTOR8i_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \
VECTOR8i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \
@ -1529,8 +1553,8 @@ public:
// _ = 0
#define VECTOR8i_PERMUTE128_2(as, an, bs, bn) \
__forceinline GSVector8i as##bs() const {return GSVector8i(_mm256_permute2x128_si256(m, m, an | (bn << 4)));} \
__forceinline GSVector8i as##bs(const GSVector8i& v) const {return GSVector8i(_mm256_permute2x128_si256(m, v.m, an | (bn << 4)));} \
__forceinline GSVector8i as##bs() const { return GSVector8i(_mm256_permute2x128_si256(m, m, an | (bn << 4))); } \
__forceinline GSVector8i as##bs(const GSVector8i& v) const { return GSVector8i(_mm256_permute2x128_si256(m, v.m, an | (bn << 4))); } \
#define VECTOR8i_PERMUTE128_1(as, an) \
VECTOR8i_PERMUTE128_2(as, an, a, 0) \
@ -1551,7 +1575,7 @@ public:
// d = v[255:192]
#define VECTOR8i_PERMUTE64_4(as, an, bs, bn, cs, cn, ds, dn) \
__forceinline GSVector8i as##bs##cs##ds() const {return GSVector8i(_mm256_permute4x64_epi64(m, _MM_SHUFFLE(dn, cn, bn, an)));} \
__forceinline GSVector8i as##bs##cs##ds() const { return GSVector8i(_mm256_permute4x64_epi64(m, _MM_SHUFFLE(dn, cn, bn, an))); } \
#define VECTOR8i_PERMUTE64_3(as, an, bs, bn, cs, cn) \
VECTOR8i_PERMUTE64_4(as, an, bs, bn, cs, cn, a, 0) \
@ -1626,9 +1650,9 @@ public:
// this one only has m128 source op, it will be saved to a temp on stack if the compiler is not smart enough and use the address of v directly (<= vs2012u3rc2)
return GSVector8i(_mm256_broadcastsi128_si256(v)); // fastest
//return GSVector8i(v); // almost as fast as broadcast
//return cast(v).insert<1>(v); // slow
//return cast(v).aa(); // slowest
// return GSVector8i(v); // almost as fast as broadcast
// return cast(v).insert<1>(v); // slow
// return cast(v).aa(); // slowest
}
__forceinline static GSVector8i broadcast8(const void* p)
@ -1656,206 +1680,206 @@ public:
return GSVector8i(_mm256_broadcastsi128_si256(*(const __m128i*)p));
}
__forceinline static GSVector8i zero() {return GSVector8i(_mm256_setzero_si256());}
__forceinline static GSVector8i zero() { return GSVector8i(_mm256_setzero_si256()); }
__forceinline static GSVector8i xffffffff() {return zero() == zero();}
__forceinline static GSVector8i xffffffff() { return zero() == zero(); }
__forceinline static GSVector8i x00000001() {return xffffffff().srl32(31);}
__forceinline static GSVector8i x00000003() {return xffffffff().srl32(30);}
__forceinline static GSVector8i x00000007() {return xffffffff().srl32(29);}
__forceinline static GSVector8i x0000000f() {return xffffffff().srl32(28);}
__forceinline static GSVector8i x0000001f() {return xffffffff().srl32(27);}
__forceinline static GSVector8i x0000003f() {return xffffffff().srl32(26);}
__forceinline static GSVector8i x0000007f() {return xffffffff().srl32(25);}
__forceinline static GSVector8i x000000ff() {return xffffffff().srl32(24);}
__forceinline static GSVector8i x000001ff() {return xffffffff().srl32(23);}
__forceinline static GSVector8i x000003ff() {return xffffffff().srl32(22);}
__forceinline static GSVector8i x000007ff() {return xffffffff().srl32(21);}
__forceinline static GSVector8i x00000fff() {return xffffffff().srl32(20);}
__forceinline static GSVector8i x00001fff() {return xffffffff().srl32(19);}
__forceinline static GSVector8i x00003fff() {return xffffffff().srl32(18);}
__forceinline static GSVector8i x00007fff() {return xffffffff().srl32(17);}
__forceinline static GSVector8i x0000ffff() {return xffffffff().srl32(16);}
__forceinline static GSVector8i x0001ffff() {return xffffffff().srl32(15);}
__forceinline static GSVector8i x0003ffff() {return xffffffff().srl32(14);}
__forceinline static GSVector8i x0007ffff() {return xffffffff().srl32(13);}
__forceinline static GSVector8i x000fffff() {return xffffffff().srl32(12);}
__forceinline static GSVector8i x001fffff() {return xffffffff().srl32(11);}
__forceinline static GSVector8i x003fffff() {return xffffffff().srl32(10);}
__forceinline static GSVector8i x007fffff() {return xffffffff().srl32( 9);}
__forceinline static GSVector8i x00ffffff() {return xffffffff().srl32( 8);}
__forceinline static GSVector8i x01ffffff() {return xffffffff().srl32( 7);}
__forceinline static GSVector8i x03ffffff() {return xffffffff().srl32( 6);}
__forceinline static GSVector8i x07ffffff() {return xffffffff().srl32( 5);}
__forceinline static GSVector8i x0fffffff() {return xffffffff().srl32( 4);}
__forceinline static GSVector8i x1fffffff() {return xffffffff().srl32( 3);}
__forceinline static GSVector8i x3fffffff() {return xffffffff().srl32( 2);}
__forceinline static GSVector8i x7fffffff() {return xffffffff().srl32( 1);}
__forceinline static GSVector8i x00000001() { return xffffffff().srl32(31); }
__forceinline static GSVector8i x00000003() { return xffffffff().srl32(30); }
__forceinline static GSVector8i x00000007() { return xffffffff().srl32(29); }
__forceinline static GSVector8i x0000000f() { return xffffffff().srl32(28); }
__forceinline static GSVector8i x0000001f() { return xffffffff().srl32(27); }
__forceinline static GSVector8i x0000003f() { return xffffffff().srl32(26); }
__forceinline static GSVector8i x0000007f() { return xffffffff().srl32(25); }
__forceinline static GSVector8i x000000ff() { return xffffffff().srl32(24); }
__forceinline static GSVector8i x000001ff() { return xffffffff().srl32(23); }
__forceinline static GSVector8i x000003ff() { return xffffffff().srl32(22); }
__forceinline static GSVector8i x000007ff() { return xffffffff().srl32(21); }
__forceinline static GSVector8i x00000fff() { return xffffffff().srl32(20); }
__forceinline static GSVector8i x00001fff() { return xffffffff().srl32(19); }
__forceinline static GSVector8i x00003fff() { return xffffffff().srl32(18); }
__forceinline static GSVector8i x00007fff() { return xffffffff().srl32(17); }
__forceinline static GSVector8i x0000ffff() { return xffffffff().srl32(16); }
__forceinline static GSVector8i x0001ffff() { return xffffffff().srl32(15); }
__forceinline static GSVector8i x0003ffff() { return xffffffff().srl32(14); }
__forceinline static GSVector8i x0007ffff() { return xffffffff().srl32(13); }
__forceinline static GSVector8i x000fffff() { return xffffffff().srl32(12); }
__forceinline static GSVector8i x001fffff() { return xffffffff().srl32(11); }
__forceinline static GSVector8i x003fffff() { return xffffffff().srl32(10); }
__forceinline static GSVector8i x007fffff() { return xffffffff().srl32( 9); }
__forceinline static GSVector8i x00ffffff() { return xffffffff().srl32( 8); }
__forceinline static GSVector8i x01ffffff() { return xffffffff().srl32( 7); }
__forceinline static GSVector8i x03ffffff() { return xffffffff().srl32( 6); }
__forceinline static GSVector8i x07ffffff() { return xffffffff().srl32( 5); }
__forceinline static GSVector8i x0fffffff() { return xffffffff().srl32( 4); }
__forceinline static GSVector8i x1fffffff() { return xffffffff().srl32( 3); }
__forceinline static GSVector8i x3fffffff() { return xffffffff().srl32( 2); }
__forceinline static GSVector8i x7fffffff() { return xffffffff().srl32( 1); }
__forceinline static GSVector8i x80000000() {return xffffffff().sll32(31);}
__forceinline static GSVector8i xc0000000() {return xffffffff().sll32(30);}
__forceinline static GSVector8i xe0000000() {return xffffffff().sll32(29);}
__forceinline static GSVector8i xf0000000() {return xffffffff().sll32(28);}
__forceinline static GSVector8i xf8000000() {return xffffffff().sll32(27);}
__forceinline static GSVector8i xfc000000() {return xffffffff().sll32(26);}
__forceinline static GSVector8i xfe000000() {return xffffffff().sll32(25);}
__forceinline static GSVector8i xff000000() {return xffffffff().sll32(24);}
__forceinline static GSVector8i xff800000() {return xffffffff().sll32(23);}
__forceinline static GSVector8i xffc00000() {return xffffffff().sll32(22);}
__forceinline static GSVector8i xffe00000() {return xffffffff().sll32(21);}
__forceinline static GSVector8i xfff00000() {return xffffffff().sll32(20);}
__forceinline static GSVector8i xfff80000() {return xffffffff().sll32(19);}
__forceinline static GSVector8i xfffc0000() {return xffffffff().sll32(18);}
__forceinline static GSVector8i xfffe0000() {return xffffffff().sll32(17);}
__forceinline static GSVector8i xffff0000() {return xffffffff().sll32(16);}
__forceinline static GSVector8i xffff8000() {return xffffffff().sll32(15);}
__forceinline static GSVector8i xffffc000() {return xffffffff().sll32(14);}
__forceinline static GSVector8i xffffe000() {return xffffffff().sll32(13);}
__forceinline static GSVector8i xfffff000() {return xffffffff().sll32(12);}
__forceinline static GSVector8i xfffff800() {return xffffffff().sll32(11);}
__forceinline static GSVector8i xfffffc00() {return xffffffff().sll32(10);}
__forceinline static GSVector8i xfffffe00() {return xffffffff().sll32( 9);}
__forceinline static GSVector8i xffffff00() {return xffffffff().sll32( 8);}
__forceinline static GSVector8i xffffff80() {return xffffffff().sll32( 7);}
__forceinline static GSVector8i xffffffc0() {return xffffffff().sll32( 6);}
__forceinline static GSVector8i xffffffe0() {return xffffffff().sll32( 5);}
__forceinline static GSVector8i xfffffff0() {return xffffffff().sll32( 4);}
__forceinline static GSVector8i xfffffff8() {return xffffffff().sll32( 3);}
__forceinline static GSVector8i xfffffffc() {return xffffffff().sll32( 2);}
__forceinline static GSVector8i xfffffffe() {return xffffffff().sll32( 1);}
__forceinline static GSVector8i x80000000() { return xffffffff().sll32(31); }
__forceinline static GSVector8i xc0000000() { return xffffffff().sll32(30); }
__forceinline static GSVector8i xe0000000() { return xffffffff().sll32(29); }
__forceinline static GSVector8i xf0000000() { return xffffffff().sll32(28); }
__forceinline static GSVector8i xf8000000() { return xffffffff().sll32(27); }
__forceinline static GSVector8i xfc000000() { return xffffffff().sll32(26); }
__forceinline static GSVector8i xfe000000() { return xffffffff().sll32(25); }
__forceinline static GSVector8i xff000000() { return xffffffff().sll32(24); }
__forceinline static GSVector8i xff800000() { return xffffffff().sll32(23); }
__forceinline static GSVector8i xffc00000() { return xffffffff().sll32(22); }
__forceinline static GSVector8i xffe00000() { return xffffffff().sll32(21); }
__forceinline static GSVector8i xfff00000() { return xffffffff().sll32(20); }
__forceinline static GSVector8i xfff80000() { return xffffffff().sll32(19); }
__forceinline static GSVector8i xfffc0000() { return xffffffff().sll32(18); }
__forceinline static GSVector8i xfffe0000() { return xffffffff().sll32(17); }
__forceinline static GSVector8i xffff0000() { return xffffffff().sll32(16); }
__forceinline static GSVector8i xffff8000() { return xffffffff().sll32(15); }
__forceinline static GSVector8i xffffc000() { return xffffffff().sll32(14); }
__forceinline static GSVector8i xffffe000() { return xffffffff().sll32(13); }
__forceinline static GSVector8i xfffff000() { return xffffffff().sll32(12); }
__forceinline static GSVector8i xfffff800() { return xffffffff().sll32(11); }
__forceinline static GSVector8i xfffffc00() { return xffffffff().sll32(10); }
__forceinline static GSVector8i xfffffe00() { return xffffffff().sll32( 9); }
__forceinline static GSVector8i xffffff00() { return xffffffff().sll32( 8); }
__forceinline static GSVector8i xffffff80() { return xffffffff().sll32( 7); }
__forceinline static GSVector8i xffffffc0() { return xffffffff().sll32( 6); }
__forceinline static GSVector8i xffffffe0() { return xffffffff().sll32( 5); }
__forceinline static GSVector8i xfffffff0() { return xffffffff().sll32( 4); }
__forceinline static GSVector8i xfffffff8() { return xffffffff().sll32( 3); }
__forceinline static GSVector8i xfffffffc() { return xffffffff().sll32( 2); }
__forceinline static GSVector8i xfffffffe() { return xffffffff().sll32( 1); }
__forceinline static GSVector8i x0001() {return xffffffff().srl16(15);}
__forceinline static GSVector8i x0003() {return xffffffff().srl16(14);}
__forceinline static GSVector8i x0007() {return xffffffff().srl16(13);}
__forceinline static GSVector8i x000f() {return xffffffff().srl16(12);}
__forceinline static GSVector8i x001f() {return xffffffff().srl16(11);}
__forceinline static GSVector8i x003f() {return xffffffff().srl16(10);}
__forceinline static GSVector8i x007f() {return xffffffff().srl16( 9);}
__forceinline static GSVector8i x00ff() {return xffffffff().srl16( 8);}
__forceinline static GSVector8i x01ff() {return xffffffff().srl16( 7);}
__forceinline static GSVector8i x03ff() {return xffffffff().srl16( 6);}
__forceinline static GSVector8i x07ff() {return xffffffff().srl16( 5);}
__forceinline static GSVector8i x0fff() {return xffffffff().srl16( 4);}
__forceinline static GSVector8i x1fff() {return xffffffff().srl16( 3);}
__forceinline static GSVector8i x3fff() {return xffffffff().srl16( 2);}
__forceinline static GSVector8i x7fff() {return xffffffff().srl16( 1);}
__forceinline static GSVector8i x0001() { return xffffffff().srl16(15); }
__forceinline static GSVector8i x0003() { return xffffffff().srl16(14); }
__forceinline static GSVector8i x0007() { return xffffffff().srl16(13); }
__forceinline static GSVector8i x000f() { return xffffffff().srl16(12); }
__forceinline static GSVector8i x001f() { return xffffffff().srl16(11); }
__forceinline static GSVector8i x003f() { return xffffffff().srl16(10); }
__forceinline static GSVector8i x007f() { return xffffffff().srl16( 9); }
__forceinline static GSVector8i x00ff() { return xffffffff().srl16( 8); }
__forceinline static GSVector8i x01ff() { return xffffffff().srl16( 7); }
__forceinline static GSVector8i x03ff() { return xffffffff().srl16( 6); }
__forceinline static GSVector8i x07ff() { return xffffffff().srl16( 5); }
__forceinline static GSVector8i x0fff() { return xffffffff().srl16( 4); }
__forceinline static GSVector8i x1fff() { return xffffffff().srl16( 3); }
__forceinline static GSVector8i x3fff() { return xffffffff().srl16( 2); }
__forceinline static GSVector8i x7fff() { return xffffffff().srl16( 1); }
__forceinline static GSVector8i x8000() {return xffffffff().sll16(15);}
__forceinline static GSVector8i xc000() {return xffffffff().sll16(14);}
__forceinline static GSVector8i xe000() {return xffffffff().sll16(13);}
__forceinline static GSVector8i xf000() {return xffffffff().sll16(12);}
__forceinline static GSVector8i xf800() {return xffffffff().sll16(11);}
__forceinline static GSVector8i xfc00() {return xffffffff().sll16(10);}
__forceinline static GSVector8i xfe00() {return xffffffff().sll16( 9);}
__forceinline static GSVector8i xff00() {return xffffffff().sll16( 8);}
__forceinline static GSVector8i xff80() {return xffffffff().sll16( 7);}
__forceinline static GSVector8i xffc0() {return xffffffff().sll16( 6);}
__forceinline static GSVector8i xffe0() {return xffffffff().sll16( 5);}
__forceinline static GSVector8i xfff0() {return xffffffff().sll16( 4);}
__forceinline static GSVector8i xfff8() {return xffffffff().sll16( 3);}
__forceinline static GSVector8i xfffc() {return xffffffff().sll16( 2);}
__forceinline static GSVector8i xfffe() {return xffffffff().sll16( 1);}
__forceinline static GSVector8i x8000() { return xffffffff().sll16(15); }
__forceinline static GSVector8i xc000() { return xffffffff().sll16(14); }
__forceinline static GSVector8i xe000() { return xffffffff().sll16(13); }
__forceinline static GSVector8i xf000() { return xffffffff().sll16(12); }
__forceinline static GSVector8i xf800() { return xffffffff().sll16(11); }
__forceinline static GSVector8i xfc00() { return xffffffff().sll16(10); }
__forceinline static GSVector8i xfe00() { return xffffffff().sll16( 9); }
__forceinline static GSVector8i xff00() { return xffffffff().sll16( 8); }
__forceinline static GSVector8i xff80() { return xffffffff().sll16( 7); }
__forceinline static GSVector8i xffc0() { return xffffffff().sll16( 6); }
__forceinline static GSVector8i xffe0() { return xffffffff().sll16( 5); }
__forceinline static GSVector8i xfff0() { return xffffffff().sll16( 4); }
__forceinline static GSVector8i xfff8() { return xffffffff().sll16( 3); }
__forceinline static GSVector8i xfffc() { return xffffffff().sll16( 2); }
__forceinline static GSVector8i xfffe() { return xffffffff().sll16( 1); }
__forceinline static GSVector8i xffffffff(const GSVector8i& v) {return v == v;}
__forceinline static GSVector8i xffffffff(const GSVector8i& v) { return v == v; }
__forceinline static GSVector8i x00000001(const GSVector8i& v) {return xffffffff(v).srl32(31);}
__forceinline static GSVector8i x00000003(const GSVector8i& v) {return xffffffff(v).srl32(30);}
__forceinline static GSVector8i x00000007(const GSVector8i& v) {return xffffffff(v).srl32(29);}
__forceinline static GSVector8i x0000000f(const GSVector8i& v) {return xffffffff(v).srl32(28);}
__forceinline static GSVector8i x0000001f(const GSVector8i& v) {return xffffffff(v).srl32(27);}
__forceinline static GSVector8i x0000003f(const GSVector8i& v) {return xffffffff(v).srl32(26);}
__forceinline static GSVector8i x0000007f(const GSVector8i& v) {return xffffffff(v).srl32(25);}
__forceinline static GSVector8i x000000ff(const GSVector8i& v) {return xffffffff(v).srl32(24);}
__forceinline static GSVector8i x000001ff(const GSVector8i& v) {return xffffffff(v).srl32(23);}
__forceinline static GSVector8i x000003ff(const GSVector8i& v) {return xffffffff(v).srl32(22);}
__forceinline static GSVector8i x000007ff(const GSVector8i& v) {return xffffffff(v).srl32(21);}
__forceinline static GSVector8i x00000fff(const GSVector8i& v) {return xffffffff(v).srl32(20);}
__forceinline static GSVector8i x00001fff(const GSVector8i& v) {return xffffffff(v).srl32(19);}
__forceinline static GSVector8i x00003fff(const GSVector8i& v) {return xffffffff(v).srl32(18);}
__forceinline static GSVector8i x00007fff(const GSVector8i& v) {return xffffffff(v).srl32(17);}
__forceinline static GSVector8i x0000ffff(const GSVector8i& v) {return xffffffff(v).srl32(16);}
__forceinline static GSVector8i x0001ffff(const GSVector8i& v) {return xffffffff(v).srl32(15);}
__forceinline static GSVector8i x0003ffff(const GSVector8i& v) {return xffffffff(v).srl32(14);}
__forceinline static GSVector8i x0007ffff(const GSVector8i& v) {return xffffffff(v).srl32(13);}
__forceinline static GSVector8i x000fffff(const GSVector8i& v) {return xffffffff(v).srl32(12);}
__forceinline static GSVector8i x001fffff(const GSVector8i& v) {return xffffffff(v).srl32(11);}
__forceinline static GSVector8i x003fffff(const GSVector8i& v) {return xffffffff(v).srl32(10);}
__forceinline static GSVector8i x007fffff(const GSVector8i& v) {return xffffffff(v).srl32( 9);}
__forceinline static GSVector8i x00ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 8);}
__forceinline static GSVector8i x01ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 7);}
__forceinline static GSVector8i x03ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 6);}
__forceinline static GSVector8i x07ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 5);}
__forceinline static GSVector8i x0fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 4);}
__forceinline static GSVector8i x1fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 3);}
__forceinline static GSVector8i x3fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 2);}
__forceinline static GSVector8i x7fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 1);}
__forceinline static GSVector8i x00000001(const GSVector8i& v) { return xffffffff(v).srl32(31); }
__forceinline static GSVector8i x00000003(const GSVector8i& v) { return xffffffff(v).srl32(30); }
__forceinline static GSVector8i x00000007(const GSVector8i& v) { return xffffffff(v).srl32(29); }
__forceinline static GSVector8i x0000000f(const GSVector8i& v) { return xffffffff(v).srl32(28); }
__forceinline static GSVector8i x0000001f(const GSVector8i& v) { return xffffffff(v).srl32(27); }
__forceinline static GSVector8i x0000003f(const GSVector8i& v) { return xffffffff(v).srl32(26); }
__forceinline static GSVector8i x0000007f(const GSVector8i& v) { return xffffffff(v).srl32(25); }
__forceinline static GSVector8i x000000ff(const GSVector8i& v) { return xffffffff(v).srl32(24); }
__forceinline static GSVector8i x000001ff(const GSVector8i& v) { return xffffffff(v).srl32(23); }
__forceinline static GSVector8i x000003ff(const GSVector8i& v) { return xffffffff(v).srl32(22); }
__forceinline static GSVector8i x000007ff(const GSVector8i& v) { return xffffffff(v).srl32(21); }
__forceinline static GSVector8i x00000fff(const GSVector8i& v) { return xffffffff(v).srl32(20); }
__forceinline static GSVector8i x00001fff(const GSVector8i& v) { return xffffffff(v).srl32(19); }
__forceinline static GSVector8i x00003fff(const GSVector8i& v) { return xffffffff(v).srl32(18); }
__forceinline static GSVector8i x00007fff(const GSVector8i& v) { return xffffffff(v).srl32(17); }
__forceinline static GSVector8i x0000ffff(const GSVector8i& v) { return xffffffff(v).srl32(16); }
__forceinline static GSVector8i x0001ffff(const GSVector8i& v) { return xffffffff(v).srl32(15); }
__forceinline static GSVector8i x0003ffff(const GSVector8i& v) { return xffffffff(v).srl32(14); }
__forceinline static GSVector8i x0007ffff(const GSVector8i& v) { return xffffffff(v).srl32(13); }
__forceinline static GSVector8i x000fffff(const GSVector8i& v) { return xffffffff(v).srl32(12); }
__forceinline static GSVector8i x001fffff(const GSVector8i& v) { return xffffffff(v).srl32(11); }
__forceinline static GSVector8i x003fffff(const GSVector8i& v) { return xffffffff(v).srl32(10); }
__forceinline static GSVector8i x007fffff(const GSVector8i& v) { return xffffffff(v).srl32( 9); }
__forceinline static GSVector8i x00ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 8); }
__forceinline static GSVector8i x01ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 7); }
__forceinline static GSVector8i x03ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 6); }
__forceinline static GSVector8i x07ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 5); }
__forceinline static GSVector8i x0fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 4); }
__forceinline static GSVector8i x1fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 3); }
__forceinline static GSVector8i x3fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 2); }
__forceinline static GSVector8i x7fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 1); }
__forceinline static GSVector8i x80000000(const GSVector8i& v) {return xffffffff(v).sll32(31);}
__forceinline static GSVector8i xc0000000(const GSVector8i& v) {return xffffffff(v).sll32(30);}
__forceinline static GSVector8i xe0000000(const GSVector8i& v) {return xffffffff(v).sll32(29);}
__forceinline static GSVector8i xf0000000(const GSVector8i& v) {return xffffffff(v).sll32(28);}
__forceinline static GSVector8i xf8000000(const GSVector8i& v) {return xffffffff(v).sll32(27);}
__forceinline static GSVector8i xfc000000(const GSVector8i& v) {return xffffffff(v).sll32(26);}
__forceinline static GSVector8i xfe000000(const GSVector8i& v) {return xffffffff(v).sll32(25);}
__forceinline static GSVector8i xff000000(const GSVector8i& v) {return xffffffff(v).sll32(24);}
__forceinline static GSVector8i xff800000(const GSVector8i& v) {return xffffffff(v).sll32(23);}
__forceinline static GSVector8i xffc00000(const GSVector8i& v) {return xffffffff(v).sll32(22);}
__forceinline static GSVector8i xffe00000(const GSVector8i& v) {return xffffffff(v).sll32(21);}
__forceinline static GSVector8i xfff00000(const GSVector8i& v) {return xffffffff(v).sll32(20);}
__forceinline static GSVector8i xfff80000(const GSVector8i& v) {return xffffffff(v).sll32(19);}
__forceinline static GSVector8i xfffc0000(const GSVector8i& v) {return xffffffff(v).sll32(18);}
__forceinline static GSVector8i xfffe0000(const GSVector8i& v) {return xffffffff(v).sll32(17);}
__forceinline static GSVector8i xffff0000(const GSVector8i& v) {return xffffffff(v).sll32(16);}
__forceinline static GSVector8i xffff8000(const GSVector8i& v) {return xffffffff(v).sll32(15);}
__forceinline static GSVector8i xffffc000(const GSVector8i& v) {return xffffffff(v).sll32(14);}
__forceinline static GSVector8i xffffe000(const GSVector8i& v) {return xffffffff(v).sll32(13);}
__forceinline static GSVector8i xfffff000(const GSVector8i& v) {return xffffffff(v).sll32(12);}
__forceinline static GSVector8i xfffff800(const GSVector8i& v) {return xffffffff(v).sll32(11);}
__forceinline static GSVector8i xfffffc00(const GSVector8i& v) {return xffffffff(v).sll32(10);}
__forceinline static GSVector8i xfffffe00(const GSVector8i& v) {return xffffffff(v).sll32( 9);}
__forceinline static GSVector8i xffffff00(const GSVector8i& v) {return xffffffff(v).sll32( 8);}
__forceinline static GSVector8i xffffff80(const GSVector8i& v) {return xffffffff(v).sll32( 7);}
__forceinline static GSVector8i xffffffc0(const GSVector8i& v) {return xffffffff(v).sll32( 6);}
__forceinline static GSVector8i xffffffe0(const GSVector8i& v) {return xffffffff(v).sll32( 5);}
__forceinline static GSVector8i xfffffff0(const GSVector8i& v) {return xffffffff(v).sll32( 4);}
__forceinline static GSVector8i xfffffff8(const GSVector8i& v) {return xffffffff(v).sll32( 3);}
__forceinline static GSVector8i xfffffffc(const GSVector8i& v) {return xffffffff(v).sll32( 2);}
__forceinline static GSVector8i xfffffffe(const GSVector8i& v) {return xffffffff(v).sll32( 1);}
__forceinline static GSVector8i x80000000(const GSVector8i& v) { return xffffffff(v).sll32(31); }
__forceinline static GSVector8i xc0000000(const GSVector8i& v) { return xffffffff(v).sll32(30); }
__forceinline static GSVector8i xe0000000(const GSVector8i& v) { return xffffffff(v).sll32(29); }
__forceinline static GSVector8i xf0000000(const GSVector8i& v) { return xffffffff(v).sll32(28); }
__forceinline static GSVector8i xf8000000(const GSVector8i& v) { return xffffffff(v).sll32(27); }
__forceinline static GSVector8i xfc000000(const GSVector8i& v) { return xffffffff(v).sll32(26); }
__forceinline static GSVector8i xfe000000(const GSVector8i& v) { return xffffffff(v).sll32(25); }
__forceinline static GSVector8i xff000000(const GSVector8i& v) { return xffffffff(v).sll32(24); }
__forceinline static GSVector8i xff800000(const GSVector8i& v) { return xffffffff(v).sll32(23); }
__forceinline static GSVector8i xffc00000(const GSVector8i& v) { return xffffffff(v).sll32(22); }
__forceinline static GSVector8i xffe00000(const GSVector8i& v) { return xffffffff(v).sll32(21); }
__forceinline static GSVector8i xfff00000(const GSVector8i& v) { return xffffffff(v).sll32(20); }
__forceinline static GSVector8i xfff80000(const GSVector8i& v) { return xffffffff(v).sll32(19); }
__forceinline static GSVector8i xfffc0000(const GSVector8i& v) { return xffffffff(v).sll32(18); }
__forceinline static GSVector8i xfffe0000(const GSVector8i& v) { return xffffffff(v).sll32(17); }
__forceinline static GSVector8i xffff0000(const GSVector8i& v) { return xffffffff(v).sll32(16); }
__forceinline static GSVector8i xffff8000(const GSVector8i& v) { return xffffffff(v).sll32(15); }
__forceinline static GSVector8i xffffc000(const GSVector8i& v) { return xffffffff(v).sll32(14); }
__forceinline static GSVector8i xffffe000(const GSVector8i& v) { return xffffffff(v).sll32(13); }
__forceinline static GSVector8i xfffff000(const GSVector8i& v) { return xffffffff(v).sll32(12); }
__forceinline static GSVector8i xfffff800(const GSVector8i& v) { return xffffffff(v).sll32(11); }
__forceinline static GSVector8i xfffffc00(const GSVector8i& v) { return xffffffff(v).sll32(10); }
__forceinline static GSVector8i xfffffe00(const GSVector8i& v) { return xffffffff(v).sll32( 9); }
__forceinline static GSVector8i xffffff00(const GSVector8i& v) { return xffffffff(v).sll32( 8); }
__forceinline static GSVector8i xffffff80(const GSVector8i& v) { return xffffffff(v).sll32( 7); }
__forceinline static GSVector8i xffffffc0(const GSVector8i& v) { return xffffffff(v).sll32( 6); }
__forceinline static GSVector8i xffffffe0(const GSVector8i& v) { return xffffffff(v).sll32( 5); }
__forceinline static GSVector8i xfffffff0(const GSVector8i& v) { return xffffffff(v).sll32( 4); }
__forceinline static GSVector8i xfffffff8(const GSVector8i& v) { return xffffffff(v).sll32( 3); }
__forceinline static GSVector8i xfffffffc(const GSVector8i& v) { return xffffffff(v).sll32( 2); }
__forceinline static GSVector8i xfffffffe(const GSVector8i& v) { return xffffffff(v).sll32( 1); }
__forceinline static GSVector8i x0001(const GSVector8i& v) {return xffffffff(v).srl16(15);}
__forceinline static GSVector8i x0003(const GSVector8i& v) {return xffffffff(v).srl16(14);}
__forceinline static GSVector8i x0007(const GSVector8i& v) {return xffffffff(v).srl16(13);}
__forceinline static GSVector8i x000f(const GSVector8i& v) {return xffffffff(v).srl16(12);}
__forceinline static GSVector8i x001f(const GSVector8i& v) {return xffffffff(v).srl16(11);}
__forceinline static GSVector8i x003f(const GSVector8i& v) {return xffffffff(v).srl16(10);}
__forceinline static GSVector8i x007f(const GSVector8i& v) {return xffffffff(v).srl16( 9);}
__forceinline static GSVector8i x00ff(const GSVector8i& v) {return xffffffff(v).srl16( 8);}
__forceinline static GSVector8i x01ff(const GSVector8i& v) {return xffffffff(v).srl16( 7);}
__forceinline static GSVector8i x03ff(const GSVector8i& v) {return xffffffff(v).srl16( 6);}
__forceinline static GSVector8i x07ff(const GSVector8i& v) {return xffffffff(v).srl16( 5);}
__forceinline static GSVector8i x0fff(const GSVector8i& v) {return xffffffff(v).srl16( 4);}
__forceinline static GSVector8i x1fff(const GSVector8i& v) {return xffffffff(v).srl16( 3);}
__forceinline static GSVector8i x3fff(const GSVector8i& v) {return xffffffff(v).srl16( 2);}
__forceinline static GSVector8i x7fff(const GSVector8i& v) {return xffffffff(v).srl16( 1);}
__forceinline static GSVector8i x0001(const GSVector8i& v) { return xffffffff(v).srl16(15); }
__forceinline static GSVector8i x0003(const GSVector8i& v) { return xffffffff(v).srl16(14); }
__forceinline static GSVector8i x0007(const GSVector8i& v) { return xffffffff(v).srl16(13); }
__forceinline static GSVector8i x000f(const GSVector8i& v) { return xffffffff(v).srl16(12); }
__forceinline static GSVector8i x001f(const GSVector8i& v) { return xffffffff(v).srl16(11); }
__forceinline static GSVector8i x003f(const GSVector8i& v) { return xffffffff(v).srl16(10); }
__forceinline static GSVector8i x007f(const GSVector8i& v) { return xffffffff(v).srl16( 9); }
__forceinline static GSVector8i x00ff(const GSVector8i& v) { return xffffffff(v).srl16( 8); }
__forceinline static GSVector8i x01ff(const GSVector8i& v) { return xffffffff(v).srl16( 7); }
__forceinline static GSVector8i x03ff(const GSVector8i& v) { return xffffffff(v).srl16( 6); }
__forceinline static GSVector8i x07ff(const GSVector8i& v) { return xffffffff(v).srl16( 5); }
__forceinline static GSVector8i x0fff(const GSVector8i& v) { return xffffffff(v).srl16( 4); }
__forceinline static GSVector8i x1fff(const GSVector8i& v) { return xffffffff(v).srl16( 3); }
__forceinline static GSVector8i x3fff(const GSVector8i& v) { return xffffffff(v).srl16( 2); }
__forceinline static GSVector8i x7fff(const GSVector8i& v) { return xffffffff(v).srl16( 1); }
__forceinline static GSVector8i x8000(const GSVector8i& v) {return xffffffff(v).sll16(15);}
__forceinline static GSVector8i xc000(const GSVector8i& v) {return xffffffff(v).sll16(14);}
__forceinline static GSVector8i xe000(const GSVector8i& v) {return xffffffff(v).sll16(13);}
__forceinline static GSVector8i xf000(const GSVector8i& v) {return xffffffff(v).sll16(12);}
__forceinline static GSVector8i xf800(const GSVector8i& v) {return xffffffff(v).sll16(11);}
__forceinline static GSVector8i xfc00(const GSVector8i& v) {return xffffffff(v).sll16(10);}
__forceinline static GSVector8i xfe00(const GSVector8i& v) {return xffffffff(v).sll16( 9);}
__forceinline static GSVector8i xff00(const GSVector8i& v) {return xffffffff(v).sll16( 8);}
__forceinline static GSVector8i xff80(const GSVector8i& v) {return xffffffff(v).sll16( 7);}
__forceinline static GSVector8i xffc0(const GSVector8i& v) {return xffffffff(v).sll16( 6);}
__forceinline static GSVector8i xffe0(const GSVector8i& v) {return xffffffff(v).sll16( 5);}
__forceinline static GSVector8i xfff0(const GSVector8i& v) {return xffffffff(v).sll16( 4);}
__forceinline static GSVector8i xfff8(const GSVector8i& v) {return xffffffff(v).sll16( 3);}
__forceinline static GSVector8i xfffc(const GSVector8i& v) {return xffffffff(v).sll16( 2);}
__forceinline static GSVector8i xfffe(const GSVector8i& v) {return xffffffff(v).sll16( 1);}
__forceinline static GSVector8i x8000(const GSVector8i& v) { return xffffffff(v).sll16(15); }
__forceinline static GSVector8i xc000(const GSVector8i& v) { return xffffffff(v).sll16(14); }
__forceinline static GSVector8i xe000(const GSVector8i& v) { return xffffffff(v).sll16(13); }
__forceinline static GSVector8i xf000(const GSVector8i& v) { return xffffffff(v).sll16(12); }
__forceinline static GSVector8i xf800(const GSVector8i& v) { return xffffffff(v).sll16(11); }
__forceinline static GSVector8i xfc00(const GSVector8i& v) { return xffffffff(v).sll16(10); }
__forceinline static GSVector8i xfe00(const GSVector8i& v) { return xffffffff(v).sll16( 9); }
__forceinline static GSVector8i xff00(const GSVector8i& v) { return xffffffff(v).sll16( 8); }
__forceinline static GSVector8i xff80(const GSVector8i& v) { return xffffffff(v).sll16( 7); }
__forceinline static GSVector8i xffc0(const GSVector8i& v) { return xffffffff(v).sll16( 6); }
__forceinline static GSVector8i xffe0(const GSVector8i& v) { return xffffffff(v).sll16( 5); }
__forceinline static GSVector8i xfff0(const GSVector8i& v) { return xffffffff(v).sll16( 4); }
__forceinline static GSVector8i xfff8(const GSVector8i& v) { return xffffffff(v).sll16( 3); }
__forceinline static GSVector8i xfffc(const GSVector8i& v) { return xffffffff(v).sll16( 2); }
__forceinline static GSVector8i xfffe(const GSVector8i& v) { return xffffffff(v).sll16( 1); }
__forceinline static GSVector8i xff(int n) {return m_xff[n];}
__forceinline static GSVector8i x0f(int n) {return m_x0f[n];}
__forceinline static GSVector8i xff(int n) { return m_xff[n]; }
__forceinline static GSVector8i x0f(int n) { return m_x0f[n]; }
};
#endif

View File

@ -30,14 +30,14 @@ static void* s_hModule;
BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
switch(ul_reason_for_call)
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
s_hModule = hModule;
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
case DLL_PROCESS_DETACH:
break;
case DLL_PROCESS_ATTACH:
s_hModule = hModule;
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
case DLL_PROCESS_DETACH:
break;
}
return TRUE;
@ -47,11 +47,14 @@ bool GSdxApp::LoadResource(int id, std::vector<char>& buff, const wchar_t* type)
{
buff.clear();
HRSRC hRsrc = FindResource((HMODULE)s_hModule, MAKEINTRESOURCE(id), type != NULL ? type : (LPWSTR)RT_RCDATA);
if(!hRsrc) return false;
if (!hRsrc)
return false;
HGLOBAL hGlobal = ::LoadResource((HMODULE)s_hModule, hRsrc);
if(!hGlobal) return false;
if (!hGlobal)
return false;
DWORD size = SizeofResource((HMODULE)s_hModule, hRsrc);
if(!size) return false;
if (!size)
return false;
// On Linux resources are always NULL terminated
// Add + 1 on size to do the same for compatibility sake (required by GSDeviceOGL)
buff.resize(size + 1);
@ -66,7 +69,8 @@ bool GSdxApp::LoadResource(int id, std::vector<char>& buff, const wchar_t* type)
bool GSdxApp::LoadResource(int id, std::vector<char>& buff, const char* type)
{
std::string path;
switch (id) {
switch (id)
{
case IDR_COMMON_GLSL:
path = "/GSdx/res/glsl/common_header.glsl";
break;
@ -99,12 +103,13 @@ bool GSdxApp::LoadResource(int id, std::vector<char>& buff, const char* type)
return false;
}
GBytes *bytes = g_resource_lookup_data(GSdx_res_get_resource(), path.c_str(), G_RESOURCE_LOOKUP_FLAGS_NONE, nullptr);
GBytes* bytes = g_resource_lookup_data(GSdx_res_get_resource(), path.c_str(), G_RESOURCE_LOOKUP_FLAGS_NONE, nullptr);
size_t size = 0;
const void* data = g_bytes_get_data(bytes, &size);
if (data == nullptr || size == 0) {
if (data == nullptr || size == 0)
{
printf("Failed to get data for resource: %d\n", id);
return false;
}
@ -125,14 +130,16 @@ size_t GSdxApp::GetIniString(const char* lpAppName, const char* lpKeyName, const
std::string key(lpKeyName);
std::string value = m_configuration_map[key];
if (value.empty()) {
if (value.empty())
{
// save the value for futur call
m_configuration_map[key] = std::string(lpDefault);
strcpy(lpReturnedString, lpDefault);
} else
}
else
strcpy(lpReturnedString, value.c_str());
return 0;
return 0;
}
bool GSdxApp::WriteIniString(const char* lpAppName, const char* lpKeyName, const char* pString, const char* lpFileName)
@ -146,16 +153,19 @@ bool GSdxApp::WriteIniString(const char* lpAppName, const char* lpKeyName, const
// Save config to a file
FILE* f = px_fopen(lpFileName, "w");
if (f == NULL) return false; // FIXME print a nice message
if (f == NULL)
return false; // FIXME print a nice message
// Maintain compatibility with GSDumpGUI/old Windows ini.
#ifdef _WIN32
fprintf(f, "[Settings]\n");
#endif
for (const auto& entry : m_configuration_map) {
for (const auto& entry : m_configuration_map)
{
// Do not save the inifile key which is not an option
if (entry.first.compare("inifile") == 0) continue;
if (entry.first.compare("inifile") == 0)
continue;
// Only keep option that have a default value (allow to purge old option of the GSdx.ini)
if (!entry.second.empty() && m_default_configuration.find(entry.first) != m_default_configuration.end())
@ -171,11 +181,13 @@ int GSdxApp::GetIniInt(const char* lpAppName, const char* lpKeyName, int nDefaul
BuildConfigurationMap(lpFileName);
std::string value = m_configuration_map[std::string(lpKeyName)];
if (value.empty()) {
if (value.empty())
{
// save the value for futur call
SetConfig(lpKeyName, nDefault);
return nDefault;
} else
}
else
return atoi(value.c_str());
}
@ -425,10 +437,12 @@ void GSdxApp::Init()
void GSdxApp::ReloadConfig()
{
if (m_configuration_map.empty()) return;
if (m_configuration_map.empty())
return;
auto file = m_configuration_map.find("inifile");
if (file == m_configuration_map.end()) return;
if (file == m_configuration_map.end())
return;
// A map was built so reload it
std::string filename = file->second;
@ -440,7 +454,8 @@ void GSdxApp::BuildConfigurationMap(const char* lpFileName)
{
// Check if the map was already built
std::string inifile_value(lpFileName);
if ( inifile_value.compare(m_configuration_map["inifile"]) == 0 ) return;
if (inifile_value.compare(m_configuration_map["inifile"]) == 0)
return;
m_configuration_map["inifile"] = inifile_value;
// Load config from file
@ -453,7 +468,8 @@ void GSdxApp::BuildConfigurationMap(const char* lpFileName)
return;
std::string line;
while (std::getline(file, line)) {
while (std::getline(file, line))
{
const auto separator = line.find('=');
if (separator == std::string::npos)
continue;
@ -485,7 +501,7 @@ void* GSdxApp::GetModuleHandlePtr()
void GSdxApp::SetConfigDir(const char* dir)
{
if( dir == NULL )
if (dir == NULL)
{
m_ini = "inis/GSdx.ini";
}
@ -493,7 +509,7 @@ void GSdxApp::SetConfigDir(const char* dir)
{
m_ini = dir;
if(m_ini[m_ini.length() - 1] != DIRECTORY_SEPARATOR)
if (m_ini[m_ini.length() - 1] != DIRECTORY_SEPARATOR)
{
m_ini += DIRECTORY_SEPARATOR;
}
@ -507,9 +523,12 @@ std::string GSdxApp::GetConfigS(const char* entry)
char buff[4096] = {0};
auto def = m_default_configuration.find(entry);
if (def != m_default_configuration.end()) {
if (def != m_default_configuration.end())
{
GetIniString(m_section.c_str(), entry, def->second.c_str(), buff, countof(buff), m_ini.c_str());
} else {
}
else
{
fprintf(stderr, "Option %s doesn't have a default value\n", entry);
GetIniString(m_section.c_str(), entry, "", buff, countof(buff), m_ini.c_str());
}
@ -526,9 +545,12 @@ int GSdxApp::GetConfigI(const char* entry)
{
auto def = m_default_configuration.find(entry);
if (def != m_default_configuration.end()) {
if (def != m_default_configuration.end())
{
return GetIniInt(m_section.c_str(), entry, std::stoi(def->second), m_ini.c_str());
} else {
}
else
{
fprintf(stderr, "Option %s doesn't have a default value\n", entry);
return GetIniInt(m_section.c_str(), entry, 0, m_ini.c_str());
}

View File

@ -28,8 +28,8 @@ class GSdxApp
{
std::string m_ini;
std::string m_section;
std::map< std::string, std::string > m_default_configuration;
std::map< std::string, std::string > m_configuration_map;
std::map<std::string, std::string> m_default_configuration;
std::map<std::string, std::string> m_configuration_map;
GSRendererType m_current_renderer_type;
public:
@ -39,7 +39,10 @@ public:
void* GetModuleHandlePtr();
#ifdef _WIN32
HMODULE GetModuleHandle() {return (HMODULE)GetModuleHandlePtr();}
HMODULE GetModuleHandle()
{
return (HMODULE)GetModuleHandlePtr();
}
#endif
void BuildConfigurationMap(const char* lpFileName);
@ -58,10 +61,13 @@ public:
void SetConfig(const char* entry, const char* value);
void SetConfig(const char* entry, int value);
// Avoid issue with overloading
template<typename T>
T GetConfigT(const char* entry) { return static_cast<T>(GetConfigI(entry)); }
int GetConfigI(const char* entry);
bool GetConfigB(const char* entry);
template <typename T>
T GetConfigT(const char* entry)
{
return static_cast<T>(GetConfigI(entry));
}
int GetConfigI(const char* entry);
bool GetConfigB(const char* entry);
std::string GetConfigS(const char* entry);
void SetCurrentRendererType(GSRendererType type);
@ -87,8 +93,14 @@ public:
std::vector<GSSetting> m_gs_tv_shaders;
};
struct GSDXError {};
struct GSDXRecoverableError : GSDXError {};
struct GSDXErrorGlVertexArrayTooSmall : GSDXError {};
struct GSDXError
{
};
struct GSDXRecoverableError : GSDXError
{
};
struct GSDXErrorGlVertexArrayTooSmall : GSDXError
{
};
extern GSdxApp theApp;

View File

@ -42,7 +42,8 @@ GSDevice::GSDevice()
GSDevice::~GSDevice()
{
for(auto t : m_pool) delete t;
for (auto t : m_pool)
delete t;
delete m_backbuffer;
delete m_merge;
@ -60,7 +61,8 @@ bool GSDevice::Create(const std::shared_ptr<GSWnd>& wnd)
bool GSDevice::Reset(int w, int h)
{
for(auto t : m_pool) delete t;
for (auto t : m_pool)
delete t;
m_pool.clear();
@ -88,9 +90,9 @@ void GSDevice::Present(const GSVector4i& r, int shader)
int w = std::max<int>(cr.width(), 1);
int h = std::max<int>(cr.height(), 1);
if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
if (!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
{
if(!Reset(w, h))
if (!Reset(w, h))
{
return;
}
@ -101,7 +103,7 @@ void GSDevice::Present(const GSVector4i& r, int shader)
// FIXME is it mandatory, it could be slow
ClearRenderTarget(m_backbuffer, 0);
if(m_current)
if (m_current)
{
static int s_shader[5] = {ShaderConvert_COPY, ShaderConvert_SCANLINE,
ShaderConvert_DIAGONAL_FILTER, ShaderConvert_TRIANGULAR_FILTER,
@ -123,11 +125,11 @@ GSTexture* GSDevice::FetchSurface(int type, int w, int h, int format)
{
const GSVector2i size(w, h);
for(auto i = m_pool.begin(); i != m_pool.end(); ++i)
for (auto i = m_pool.begin(); i != m_pool.end(); ++i)
{
GSTexture* t = *i;
if(t->GetType() == type && t->GetFormat() == format && t->GetSize() == size)
if (t->GetType() == type && t->GetFormat() == format && t->GetSize() == size)
{
m_pool.erase(i);
@ -142,7 +144,7 @@ void GSDevice::PrintMemoryUsage()
{
#ifdef ENABLE_OGL_DEBUG
uint32 pool = 0;
for(auto t : m_pool)
for (auto t : m_pool)
{
if (t)
pool += t->GetMemUsage();
@ -161,7 +163,7 @@ void GSDevice::EndScene()
void GSDevice::Recycle(GSTexture* t)
{
if(t)
if (t)
{
#ifdef _DEBUG
// Uncommit saves memory but it means a futur allocation when we want to reuse the texture.
@ -175,7 +177,7 @@ void GSDevice::Recycle(GSTexture* t)
//printf("%d\n",m_pool.size());
while(m_pool.size() > 300)
while (m_pool.size() > 300)
{
delete m_pool.back();
@ -188,7 +190,7 @@ void GSDevice::AgePool()
{
m_frame++;
while(m_pool.size() > 40 && m_frame - m_pool.back()->last_frame_used > 10)
while (m_pool.size() > 40 && m_frame - m_pool.back()->last_frame_used > 10)
{
delete m_pool.back();
@ -199,7 +201,7 @@ void GSDevice::AgePool()
void GSDevice::PurgePool()
{
// OOM emergency. Let's free this useless pool
while(!m_pool.empty())
while (!m_pool.empty())
{
delete m_pool.back();
@ -253,13 +255,13 @@ void GSDevice::Merge(GSTexture* sTex[3], GSVector4* sRect, GSVector4* dRect, con
// (texture appears to be non-null, and is being re-created at a size around like 1700x340,
// dunno if that's relevant) -- air
if(ResizeTarget(&m_merge, fs.x, fs.y))
if (ResizeTarget(&m_merge, fs.x, fs.y))
{
GSTexture* tex[3] = {NULL, NULL, NULL};
for(size_t i = 0; i < countof(tex); i++)
for (size_t i = 0; i < countof(tex); i++)
{
if(sTex[i] != NULL)
if (sTex[i] != NULL)
{
tex[i] = sTex[i];
}
@ -267,9 +269,9 @@ void GSDevice::Merge(GSTexture* sTex[3], GSVector4* sRect, GSVector4* dRect, con
DoMerge(tex, sRect, m_merge, dRect, PMODE, EXTBUF, c);
for(size_t i = 0; i < countof(tex); i++)
for (size_t i = 0; i < countof(tex); i++)
{
if(tex[i] != sTex[i])
if (tex[i] != sTex[i])
{
Recycle(tex[i]);
}
@ -287,13 +289,13 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse
{
ResizeTarget(&m_weavebob, ds.x, ds.y);
if(mode == 0 || mode == 2) // weave or blend
if (mode == 0 || mode == 2) // weave or blend
{
// weave first
DoInterlace(m_merge, m_weavebob, field, false, 0);
if(mode == 2)
if (mode == 2)
{
// blend
@ -308,7 +310,7 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse
m_current = m_weavebob;
}
}
else if(mode == 1) // bob
else if (mode == 1) // bob
{
DoInterlace(m_merge, m_weavebob, 3, true, yoffset * field);
@ -338,7 +340,7 @@ void GSDevice::FXAA()
{
GSVector2i s = m_current->GetSize();
if(ResizeTarget(&m_target_tmp))
if (ResizeTarget(&m_target_tmp))
{
GSVector4 sRect(0, 0, 1, 1);
GSVector4 dRect(0, 0, s.x, s.y);
@ -352,7 +354,7 @@ void GSDevice::ShadeBoost()
{
GSVector2i s = m_current->GetSize();
if(ResizeTarget(&m_target_tmp))
if (ResizeTarget(&m_target_tmp))
{
GSVector4 sRect(0, 0, 1, 1);
GSVector4 dRect(0, 0, s.x, s.y);
@ -364,11 +366,15 @@ void GSDevice::ShadeBoost()
bool GSDevice::ResizeTexture(GSTexture** t, int type, int w, int h)
{
if(t == NULL) {ASSERT(0); return false;}
if (t == NULL)
{
ASSERT(0);
return false;
}
GSTexture* t2 = *t;
if(t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h)
if (t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h)
{
delete t2;
@ -403,7 +409,7 @@ GSAdapter::operator std::string() const
return buf;
}
bool GSAdapter::operator==(const GSAdapter &desc_dxgi) const
bool GSAdapter::operator==(const GSAdapter& desc_dxgi) const
{
return vendor == desc_dxgi.vendor
&& device == desc_dxgi.device
@ -412,7 +418,7 @@ bool GSAdapter::operator==(const GSAdapter &desc_dxgi) const
}
#ifdef _WIN32
GSAdapter::GSAdapter(const DXGI_ADAPTER_DESC1 &desc_dxgi)
GSAdapter::GSAdapter(const DXGI_ADAPTER_DESC1& desc_dxgi)
: vendor(desc_dxgi.VendorId)
, device(desc_dxgi.DeviceId)
, subsys(desc_dxgi.SubSysId)

View File

@ -71,7 +71,7 @@ class MergeConstantBuffer
public:
GSVector4 BGColor;
MergeConstantBuffer() {memset(this, 0, sizeof(*this));}
MergeConstantBuffer() { memset(this, 0, sizeof(*this)); }
};
class InterlaceConstantBuffer
@ -81,7 +81,7 @@ public:
float hH;
float _pad[1];
InterlaceConstantBuffer() {memset(this, 0, sizeof(*this));}
InterlaceConstantBuffer() { memset(this, 0, sizeof(*this)); }
};
class ExternalFXConstantBuffer
@ -100,7 +100,7 @@ public:
GSVector4 rcpFrame;
GSVector4 rcpFrameOpt;
FXAAConstantBuffer() {memset(this, 0, sizeof(*this));}
FXAAConstantBuffer() { memset(this, 0, sizeof(*this)); }
};
class ShadeBoostConstantBuffer
@ -109,7 +109,7 @@ public:
GSVector4 rcpFrame;
GSVector4 rcpFrameOpt;
ShadeBoostConstantBuffer() {memset(this, 0, sizeof(*this));}
ShadeBoostConstantBuffer() { memset(this, 0, sizeof(*this)); }
};
#pragma pack(pop)
@ -124,7 +124,10 @@ enum HWBlendFlags
};
// Determines the HW blend function for DX11/OGL
struct HWBlend { uint16 flags, op, src, dst; };
struct HWBlend
{
uint16 flags, op, src, dst;
};
class GSDevice : public GSAlignedClass<32>
{
@ -145,8 +148,8 @@ protected:
OP_ADD, OP_SUBTRACT, OP_REV_SUBTRACT
};
static const int m_NO_BLEND = 0;
static const int m_MERGE_BLEND = m_blendMap.size() - 1;
static const int m_NO_BLEND = 0;
static const int m_MERGE_BLEND = m_blendMap.size() - 1;
std::shared_ptr<GSWnd> m_wnd;
int m_vsync;
@ -157,8 +160,14 @@ protected:
GSTexture* m_blend;
GSTexture* m_target_tmp;
GSTexture* m_current;
struct {size_t stride, start, count, limit;} m_vertex;
struct {size_t start, count, limit;} m_index;
struct
{
size_t stride, start, count, limit;
} m_vertex;
struct
{
size_t start, count, limit;
} m_index;
unsigned int m_frame; // for ageing the pool
bool m_linear_present;
@ -180,16 +189,21 @@ public:
void Recycle(GSTexture* t);
enum {Windowed, Fullscreen, DontCare};
enum
{
Windowed,
Fullscreen,
DontCare
};
virtual bool Create(const std::shared_ptr<GSWnd> &wnd);
virtual bool Create(const std::shared_ptr<GSWnd>& wnd);
virtual bool Reset(int w, int h);
virtual bool IsLost(bool update = false) {return false;}
virtual bool IsLost(bool update = false) { return false; }
virtual void Present(const GSVector4i& r, int shader);
virtual void Present(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, int shader = 0);
virtual void Flip() {}
virtual void SetVSync(int vsync) {m_vsync = vsync;}
virtual void SetVSync(int vsync) { m_vsync = vsync; }
virtual void BeginScene() {}
virtual void DrawPrimitive() {};
@ -212,7 +226,7 @@ public:
GSTexture* CreateTexture(int w, int h, int format = 0);
GSTexture* CreateOffscreen(int w, int h, int format = 0);
virtual GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0) {return NULL;}
virtual GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0) { return NULL; }
virtual void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) {}
virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true) {}
@ -238,7 +252,7 @@ public:
bool ResizeTarget(GSTexture** t, int w, int h);
bool ResizeTarget(GSTexture** t);
bool IsRBSwapped() {return m_rbswapped;}
bool IsRBSwapped() { return m_rbswapped; }
void AgePool();
void PurgePool();
@ -260,17 +274,17 @@ struct GSAdapter
operator std::string() const;
bool operator==(const GSAdapter&) const;
bool operator==(const std::string &s) const
bool operator==(const std::string& s) const
{
return (std::string)*this == s;
}
bool operator==(const char *s) const
bool operator==(const char* s) const
{
return (std::string)*this == s;
}
#ifdef _WIN32
GSAdapter(const DXGI_ADAPTER_DESC1 &desc_dxgi);
GSAdapter(const DXGI_ADAPTER_DESC1& desc_dxgi);
#endif
#ifdef __linux__
// TODO

View File

@ -43,7 +43,7 @@ const GSVector4i GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0) const
const GSVector2i src = GSLocalMemory::m_psm[psm].bs;
if(psm != TEX0.PSM)
if (psm != TEX0.PSM)
{
const GSVector2i dst = GSLocalMemory::m_psm[TEX0.PSM].bs;
@ -64,11 +64,11 @@ const GSVector4i GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0) const
const GSVector4i GSDirtyRectList::GetDirtyRectAndClear(const GIFRegTEX0& TEX0, const GSVector2i& size)
{
if(!empty())
if (!empty())
{
GSVector4i r(INT_MAX, INT_MAX, 0, 0);
for(const auto& dirty_rect : *this)
for (const auto& dirty_rect : *this)
{
r = r.runion(dirty_rect.GetDirtyRect(TEX0));
}

View File

@ -23,8 +23,9 @@
#pragma once
template <class T>
struct Element {
T data;
struct Element
{
T data;
uint16 next_index;
uint16 prev_index;
};
@ -33,8 +34,10 @@ template <class T>
class FastListIterator;
template <class T>
class FastList {
class FastList
{
friend class FastListIterator<T>;
private:
// The index of the first element of the list is m_buffer[0].next_index
// The first Element<T> of the list has prev_index equal to 0
@ -56,16 +59,19 @@ private:
uint16* m_free_indexes_stack;
public:
__forceinline FastList() {
__forceinline FastList()
{
m_buffer = nullptr;
clear();
}
__forceinline ~FastList() {
__forceinline ~FastList()
{
_aligned_free(m_buffer);
}
void clear() {
void clear()
{
// Initialize m_capacity to 4 so we avoid to Grow() on initial insertions
// The code doesn't break if this value is changed with anything from 1 to USHRT_MAX
m_capacity = 4;
@ -77,20 +83,23 @@ public:
m_free_indexes_stack = (uint16*)&m_buffer[m_capacity];
// Initialize m_buffer[0], data field is unused but initialized using default T constructor
m_buffer[0] = { T(), 0, 0 };
m_buffer[0] = {T(), 0, 0};
// m_free_indexes_stack top index is 0, bottom index is m_capacity - 2
m_free_indexes_stack_top = 0;
// m_buffer index 0 is reserved for auxiliary element
for (uint16 i = 0; i < m_capacity - 1; i++) {
for (uint16 i = 0; i < m_capacity - 1; i++)
{
m_free_indexes_stack[i] = i + 1;
}
}
// Insert the element in front of the list and return its position in m_buffer
__forceinline uint16 InsertFront(const T& data) {
if (Full()) {
__forceinline uint16 InsertFront(const T& data)
{
if (Full())
{
Grow();
}
@ -101,81 +110,99 @@ public:
return free_index;
}
__forceinline void push_front(const T& data) {
__forceinline void push_front(const T& data)
{
InsertFront(data);
}
__forceinline const T& back() const {
__forceinline const T& back() const
{
return m_buffer[LastIndex()].data;
}
__forceinline void pop_back() {
__forceinline void pop_back()
{
EraseIndex(LastIndex());
}
__forceinline uint16 size() const {
__forceinline uint16 size() const
{
return m_free_indexes_stack_top;
}
__forceinline bool empty() const {
__forceinline bool empty() const
{
return size() == 0;
}
__forceinline void EraseIndex(const uint16 index) {
__forceinline void EraseIndex(const uint16 index)
{
ListRemove(index);
m_free_indexes_stack[--m_free_indexes_stack_top] = index;
}
__forceinline void MoveFront(const uint16 index) {
if (FirstIndex() != index) {
__forceinline void MoveFront(const uint16 index)
{
if (FirstIndex() != index)
{
ListRemove(index);
ListInsertFront(index);
}
}
__forceinline const FastListIterator<T> begin() const {
__forceinline const FastListIterator<T> begin() const
{
return FastListIterator<T>(this, FirstIndex());
}
__forceinline const FastListIterator<T> end() const {
__forceinline const FastListIterator<T> end() const
{
return FastListIterator<T>(this, 0);
}
__forceinline FastListIterator<T> erase(FastListIterator<T> i) {
__forceinline FastListIterator<T> erase(FastListIterator<T> i)
{
EraseIndex(i.Index());
return ++i;
}
private:
// Accessed by FastListIterator<T> using class friendship
__forceinline const T& Data(const uint16 index) const {
__forceinline const T& Data(const uint16 index) const
{
return m_buffer[index].data;
}
// Accessed by FastListIterator<T> using class friendship
__forceinline uint16 NextIndex(const uint16 index) const {
__forceinline uint16 NextIndex(const uint16 index) const
{
return m_buffer[index].next_index;
}
// Accessed by FastListIterator<T> using class friendship
__forceinline uint16 PrevIndex(const uint16 index) const {
__forceinline uint16 PrevIndex(const uint16 index) const
{
return m_buffer[index].prev_index;
}
__forceinline uint16 FirstIndex() const {
__forceinline uint16 FirstIndex() const
{
return m_buffer[0].next_index;
}
__forceinline uint16 LastIndex() const {
__forceinline uint16 LastIndex() const
{
return m_buffer[0].prev_index;
}
__forceinline bool Full() const {
__forceinline bool Full() const
{
// The minus one is due to the presence of the auxiliary element
return size() == m_capacity - 1;
}
__forceinline void ListInsertFront(const uint16 index) {
__forceinline void ListInsertFront(const uint16 index)
{
// Update prev / next indexes to add m_buffer[index] to the chain
Element<T>& head = m_buffer[0];
m_buffer[index].prev_index = 0;
@ -184,15 +211,18 @@ private:
head.next_index = index;
}
__forceinline void ListRemove(const uint16 index) {
__forceinline void ListRemove(const uint16 index)
{
// Update prev / next indexes to remove m_buffer[index] from the chain
const Element<T>& to_remove = m_buffer[index];
m_buffer[to_remove.prev_index].next_index = to_remove.next_index;
m_buffer[to_remove.next_index].prev_index = to_remove.prev_index;
}
void Grow() {
if (m_capacity == USHRT_MAX) {
void Grow()
{
if (m_capacity == USHRT_MAX)
{
throw std::runtime_error("FastList size maxed out at USHRT_MAX (65535) elements, cannot grow futhermore.");
}
@ -210,7 +240,8 @@ private:
m_free_indexes_stack = new_free_indexes_stack;
// Initialize the additional space in the stack
for (uint16 i = m_capacity - 1; i < new_capacity - 1; i++) {
for (uint16 i = m_capacity - 1; i < new_capacity - 1; i++)
{
m_free_indexes_stack[i] = i + 1;
}
@ -228,50 +259,59 @@ private:
uint16 m_index;
public:
__forceinline FastListIterator(const FastList<T>* fastlist, const uint16 index) {
__forceinline FastListIterator(const FastList<T>* fastlist, const uint16 index)
{
m_fastlist = fastlist;
m_index = index;
}
__forceinline bool operator!=(const FastListIterator<T>& other) const {
__forceinline bool operator!=(const FastListIterator<T>& other) const
{
return (m_index != other.m_index);
}
__forceinline bool operator==(const FastListIterator<T>& other) const {
__forceinline bool operator==(const FastListIterator<T>& other) const
{
return (m_index == other.m_index);
}
// Prefix increment
__forceinline const FastListIterator<T>& operator++() {
__forceinline const FastListIterator<T>& operator++()
{
m_index = m_fastlist->NextIndex(m_index);
return *this;
}
// Postfix increment
__forceinline const FastListIterator<T> operator++(int) {
__forceinline const FastListIterator<T> operator++(int)
{
FastListIterator<T> copy(*this);
++(*this);
return copy;
}
// Prefix decrement
__forceinline const FastListIterator<T>& operator--() {
__forceinline const FastListIterator<T>& operator--()
{
m_index = m_fastlist->PrevIndex(m_index);
return *this;
}
// Postfix decrement
__forceinline const FastListIterator<T> operator--(int) {
__forceinline const FastListIterator<T> operator--(int)
{
FastListIterator<T> copy(*this);
--(*this);
return copy;
}
__forceinline const T& operator*() const {
__forceinline const T& operator*() const
{
return m_fastlist->Data(m_index);
}
__forceinline uint16 Index() const {
__forceinline uint16 Index() const
{
return m_index;
}
};

View File

@ -28,7 +28,8 @@
#include "Renderers/SW/GSScanlineEnvironment.h"
template<class KEY, class VALUE> class GSFunctionMap
template <class KEY, class VALUE>
class GSFunctionMap
{
protected:
struct ActivePtr
@ -53,16 +54,17 @@ public:
virtual ~GSFunctionMap()
{
for(auto &i : m_map_active) delete i.second;
for (auto& i : m_map_active)
delete i.second;
}
VALUE operator [] (KEY key)
VALUE operator[](KEY key)
{
m_active = NULL;
auto it = m_map_active.find(key);
if(it != m_map_active.end())
if (it != m_map_active.end())
{
m_active = it->second;
}
@ -88,9 +90,9 @@ public:
void UpdateStats(uint64 frame, uint64 ticks, int actual, int total)
{
if(m_active)
if (m_active)
{
if(m_active->frame != frame)
if (m_active->frame != frame)
{
m_active->frame = frame;
m_active->frames++;
@ -108,11 +110,11 @@ public:
{
uint64 ttpf = 0;
for(const auto &i : m_map_active)
for (const auto& i : m_map_active)
{
ActivePtr* p = i.second;
if(p->frames)
if (p->frames)
{
ttpf += p->ticks / p->frames;
}
@ -120,12 +122,12 @@ public:
printf("GS stats\n");
for (const auto &i : m_map_active)
for (const auto& i : m_map_active)
{
KEY key = i.first;
ActivePtr* p = i.second;
if(p->frames && ttpf)
if (p->frames && ttpf)
{
uint64 tpp = p->actual > 0 ? p->ticks / p->actual : 0;
uint64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
@ -154,7 +156,7 @@ public:
}
};
template<class CG, class KEY, class VALUE>
template <class CG, class KEY, class VALUE>
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
{
std::string m_name;
@ -163,7 +165,7 @@ class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
GSCodeBuffer m_cb;
size_t m_total_code_size;
enum {MAX_SIZE = 8192};
enum { MAX_SIZE = 8192 };
public:
GSCodeGeneratorFunctionMap(const char* name, void* param)
@ -186,7 +188,7 @@ public:
auto i = m_cgmap.find(key);
if(i != m_cgmap.end())
if (i != m_cgmap.end())
{
ret = i->second;
}
@ -211,7 +213,7 @@ public:
m_cgmap[key] = ret;
#ifdef ENABLE_VTUNE
#ifdef ENABLE_VTUNE
// vtune method registration
@ -249,7 +251,7 @@ public:
*/
}
#endif
#endif
delete cg;
}

View File

@ -23,20 +23,23 @@
#include "GSdx.h"
#include "GSOsdManager.h"
#ifdef _WIN32
#include "resource.h"
#include "resource.h"
#endif
void GSOsdManager::LoadFont() {
void GSOsdManager::LoadFont()
{
FT_Error error = FT_New_Face(m_library, theApp.GetConfigS("osd_fontname").c_str(), 0, &m_face);
if (error) {
if (error)
{
FT_Error error_load_res = 1;
if(theApp.LoadResource(IDR_FONT_ROBOTO, resource_data_buffer))
if (theApp.LoadResource(IDR_FONT_ROBOTO, resource_data_buffer))
error_load_res = FT_New_Memory_Face(m_library, (const FT_Byte*)resource_data_buffer.data(), resource_data_buffer.size(), 0, &m_face);
if (error_load_res) {
if (error_load_res)
{
m_face = NULL;
fprintf(stderr, "Failed to init freetype face from external and internal resource\n");
if(error == FT_Err_Unknown_File_Format)
if (error == FT_Err_Unknown_File_Format)
fprintf(stderr, "\tFreetype unknown file format for external file\n");
return;
}
@ -45,11 +48,14 @@ void GSOsdManager::LoadFont() {
LoadSize();
}
void GSOsdManager::LoadSize() {
if (!m_face) return;
void GSOsdManager::LoadSize()
{
if (!m_face)
return;
FT_Error error = FT_Set_Pixel_Sizes(m_face, 0, m_size);;
if (error) {
FT_Error error = FT_Set_Pixel_Sizes(m_face, 0, m_size);
if (error)
{
fprintf(stderr, "Failed to init the face size\n");
return;
}
@ -60,11 +66,12 @@ void GSOsdManager::LoadSize() {
m_atlas_h = m_size + 10; // another random guess
}
GSOsdManager::GSOsdManager() : m_atlas_h(0)
, m_atlas_w(0)
, m_max_width(0)
, m_onscreen_messages(0)
, m_texture_dirty(true)
GSOsdManager::GSOsdManager()
: m_atlas_h(0)
, m_atlas_w(0)
, m_max_width(0)
, m_onscreen_messages(0)
, m_texture_dirty(true)
{
m_monitor_enabled = theApp.GetConfigB("osd_monitor_enabled");
m_log_enabled = theApp.GetConfigB("osd_log_enabled");
@ -79,7 +86,8 @@ GSOsdManager::GSOsdManager() : m_atlas_h(0)
m_color = r | (g << 8) | (b << 16) | (255 << 24);
if (FT_Init_FreeType(&m_library)) {
if (FT_Init_FreeType(&m_library))
{
m_face = NULL;
fprintf(stderr, "Failed to init the freetype library\n");
return;
@ -91,24 +99,30 @@ GSOsdManager::GSOsdManager() : m_atlas_h(0)
AddGlyph(' ');
}
GSOsdManager::~GSOsdManager() {
GSOsdManager::~GSOsdManager()
{
FT_Done_FreeType(m_library);
}
GSVector2i GSOsdManager::get_texture_font_size() {
GSVector2i GSOsdManager::get_texture_font_size()
{
return GSVector2i(m_atlas_w, m_atlas_h);
}
void GSOsdManager::upload_texture_atlas(GSTexture* t) {
if (!m_face) return;
void GSOsdManager::upload_texture_atlas(GSTexture* t)
{
if (!m_face)
return;
if (m_char_info.size() > 96) // we only reserved space for this many glyphs
fprintf(stderr, "More than 96 glyphs needed for OSD");
// This can be sped up a bit by only uploading new glyphs
int x = 0;
for(auto &pair : m_char_info) {
if(FT_Load_Char(m_face, pair.first, FT_LOAD_RENDER)) {
for (auto& pair : m_char_info)
{
if (FT_Load_Char(m_face, pair.first, FT_LOAD_RENDER))
{
fprintf(stderr, "failed to load char U%d\n", (int)pair.first);
continue;
}
@ -123,11 +137,12 @@ void GSOsdManager::upload_texture_atlas(GSTexture* t) {
pair.second.bl = m_face->glyph->bitmap_left;
pair.second.bt = m_face->glyph->bitmap_top;
GSVector4i r(x, 0, x+pair.second.bw, pair.second.bh);
GSVector4i r(x, 0, x + pair.second.bw, pair.second.bh);
if (r.width())
t->Update(r, m_face->glyph->bitmap.buffer, m_face->glyph->bitmap.pitch);
if (r.width() > m_max_width) m_max_width = r.width();
if (r.width() > m_max_width)
m_max_width = r.width();
pair.second.tx = (float)x / m_atlas_w;
pair.second.ty = (float)pair.second.bh / m_atlas_h;
@ -139,38 +154,53 @@ void GSOsdManager::upload_texture_atlas(GSTexture* t) {
m_texture_dirty = false;
}
#if __GNUC__ < 5 || ( __GNUC__ == 5 && __GNUC_MINOR__ < 4 )
#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4)
/* This is dumb in that it doesn't check for malformed UTF8. This function
* is not expected to operate on user input, but only on compiled in strings */
void dumb_utf8_to_utf32(const char *utf8, char32_t *utf32, unsigned size) {
while(*utf8 && --size) {
if((*utf8 & 0xF1) == 0xF0) {
void dumb_utf8_to_utf32(const char* utf8, char32_t* utf32, unsigned size)
{
while (*utf8 && --size)
{
if ((*utf8 & 0xF1) == 0xF0)
{
*utf32++ = (utf8[0] & 0x07) << 18 | (utf8[1] & 0x3F) << 12 | (utf8[2] & 0x3F) << 6 | utf8[3] & 0x3F;
utf8 += 4;
} else if((*utf8 & 0xF0) == 0xE0) {
}
else if ((*utf8 & 0xF0) == 0xE0)
{
*utf32++ = (utf8[0] & 0x0F) << 12 | (utf8[1] & 0x3F) << 6 | utf8[2] & 0x3F;
utf8 += 3;
} else if((*utf8 & 0xE0) == 0xC0) {
}
else if ((*utf8 & 0xE0) == 0xC0)
{
*utf32++ = (utf8[0] & 0x1F) << 6 | utf8[1] & 0x3F;
utf8 += 2;
} else if((*utf8 & 0x80) == 0x00) {
}
else if ((*utf8 & 0x80) == 0x00)
{
*utf32++ = utf8[0] & 0x7F;
utf8 += 1;
}
}
if(size) *utf32 = *utf8; // Copy NUL char
if (size)
*utf32 = *utf8; // Copy NUL char
}
#endif
void GSOsdManager::AddGlyph(char32_t codepoint) {
if (!m_face) return;
if(m_char_info.count(codepoint) == 0) {
void GSOsdManager::AddGlyph(char32_t codepoint)
{
if (!m_face)
return;
if (m_char_info.count(codepoint) == 0)
{
m_texture_dirty = true;
m_char_info[codepoint]; // add it
if(FT_HAS_KERNING(m_face)) {
if (FT_HAS_KERNING(m_face))
{
FT_UInt new_glyph = FT_Get_Char_Index(m_face, codepoint);
for(auto pair : m_char_info) {
for (auto pair : m_char_info)
{
FT_Vector delta;
FT_UInt glyph_index = FT_Get_Char_Index(m_face, pair.first);
@ -181,14 +211,16 @@ void GSOsdManager::AddGlyph(char32_t codepoint) {
}
}
void GSOsdManager::Log(const char *utf8) {
if(!m_log_enabled)
void GSOsdManager::Log(const char* utf8)
{
if (!m_log_enabled)
return;
#if __GNUC__ < 5 || ( __GNUC__ == 5 && __GNUC_MINOR__ < 4 )
#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4)
char32_t buffer[256];
dumb_utf8_to_utf32(utf8, buffer, countof(buffer));
for(char32_t* c = buffer; *c; ++c) AddGlyph(*c);
for (char32_t* c = buffer; *c; ++c)
AddGlyph(*c);
#else
#if _MSC_VER == 1900
std::wstring_convert<std::codecvt_utf8<unsigned int>, unsigned int> conv;
@ -196,24 +228,28 @@ void GSOsdManager::Log(const char *utf8) {
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
#endif
std::u32string buffer = conv.from_bytes(utf8);
for(auto const &c : buffer) AddGlyph(c);
for (auto const& c : buffer)
AddGlyph(c);
#endif
m_onscreen_messages++;
m_log.push_back(log_info{buffer, std::chrono::system_clock::time_point()});
}
void GSOsdManager::Monitor(const char *key, const char *value) {
if(!m_monitor_enabled)
void GSOsdManager::Monitor(const char* key, const char* value)
{
if (!m_monitor_enabled)
return;
if(value && *value) {
#if __GNUC__ < 5 || ( __GNUC__ == 5 && __GNUC_MINOR__ < 4 )
if (value && *value)
{
#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4)
char32_t buffer[256], vbuffer[256];
dumb_utf8_to_utf32(key, buffer, countof(buffer));
dumb_utf8_to_utf32(value, vbuffer, countof(vbuffer));
for(char32_t* c = buffer; *c; ++c) AddGlyph(*c);
for(char32_t* c = vbuffer; *c; ++c) AddGlyph(*c);
for (char32_t* c = buffer; *c; ++c)
AddGlyph(*c);
for (char32_t* c = vbuffer; *c; ++c)
AddGlyph(*c);
#else
#if _MSC_VER == 1900
std::wstring_convert<std::codecvt_utf8<unsigned int>, unsigned int> conv;
@ -222,12 +258,16 @@ void GSOsdManager::Monitor(const char *key, const char *value) {
#endif
std::u32string buffer = conv.from_bytes(key);
std::u32string vbuffer = conv.from_bytes(value);
for(auto const &c : buffer) AddGlyph(c);
for(auto const &c : vbuffer) AddGlyph(c);
for (auto const& c : buffer)
AddGlyph(c);
for (auto const& c : vbuffer)
AddGlyph(c);
#endif
m_monitor[buffer] = vbuffer;
} else {
#if __GNUC__ < 5 || ( __GNUC__ == 5 && __GNUC_MINOR__ < 4 )
}
else
{
#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4)
char32_t buffer[256];
dumb_utf8_to_utf32(key, buffer, countof(buffer));
#else
@ -242,11 +282,12 @@ void GSOsdManager::Monitor(const char *key, const char *value) {
}
}
void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, uint32 color) {
float x2 = x + g.bl * (2.0f/m_real_size.x);
float y2 = -y - g.bt * (2.0f/m_real_size.y);
float w = g.bw * (2.0f/m_real_size.x);
float h = g.bh * (2.0f/m_real_size.y);
void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, uint32 color)
{
float x2 = x + g.bl * (2.0f / m_real_size.x);
float y2 = -y - g.bt * (2.0f / m_real_size.y);
float w = g.bw * (2.0f / m_real_size.x);
float h = g.bh * (2.0f / m_real_size.y);
dst->p = GSVector4(x2 , -y2 , 0.0f, 1.0f);
dst->t = GSVector2(g.tx , 0.0f);
@ -274,18 +315,21 @@ void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, fl
++dst;
}
void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, uint32 color) {
void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, uint32 color)
{
char32_t p = 0;
for(const auto & c : msg) {
if(p) {
x += m_kern_info[std::make_pair(p, c)] * (2.0f/m_real_size.x);
for (const auto& c : msg)
{
if (p)
{
x += m_kern_info[std::make_pair(p, c)] * (2.0f / m_real_size.x);
}
RenderGlyph(dst, m_char_info[c], x, y, color);
/* Advance the cursor to the start of the next character */
x += m_char_info[c].ax * (2.0f/m_real_size.x);
y += m_char_info[c].ay * (2.0f/m_real_size.y);
x += m_char_info[c].ax * (2.0f / m_real_size.x);
y += m_char_info[c].ay * (2.0f / m_real_size.y);
dst += 6;
@ -293,36 +337,47 @@ void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, floa
}
}
size_t GSOsdManager::Size() {
size_t GSOsdManager::Size()
{
size_t sum = 0;
if(m_log_enabled) {
if (m_log_enabled)
{
float offset = 0;
for(auto it = m_log.begin(); it != m_log.end(); ++it) {
float y = 1 - ((m_size+2)*(it-m_log.begin()+1)) * (2.0f/m_real_size.y);
if(y + offset < -1) break;
for (auto it = m_log.begin(); it != m_log.end(); ++it)
{
float y = 1 - ((m_size + 2) * (it - m_log.begin() + 1)) * (2.0f / m_real_size.y);
if (y + offset < -1)
break;
std::chrono::duration<float> elapsed;
if(it->OnScreen.time_since_epoch().count() == 0) {
if (it->OnScreen.time_since_epoch().count() == 0)
{
elapsed = std::chrono::seconds(0);
} else {
}
else
{
elapsed = std::chrono::system_clock::now() - it->OnScreen;
if(elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages) {
if (elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages)
{
continue;
}
}
float ratio = (elapsed - std::chrono::seconds(m_log_timeout/2)).count() / std::chrono::seconds(m_log_timeout/2).count();
ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : ratio;
float ratio = (elapsed - std::chrono::seconds(m_log_timeout / 2)).count() / std::chrono::seconds(m_log_timeout / 2).count();
ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f :
ratio;
y += offset += ((m_size+2) * (2.0f/m_real_size.y)) * ratio;
y += offset += ((m_size + 2) * (2.0f / m_real_size.y)) * ratio;
sum += it->msg.size();
}
}
if(m_monitor_enabled) {
for(const auto &pair : m_monitor) {
if (m_monitor_enabled)
{
for (const auto& pair : m_monitor)
{
sum += pair.first.size();
sum += pair.second.size();
}
@ -331,17 +386,20 @@ size_t GSOsdManager::Size() {
return sum * 6;
}
float GSOsdManager::StringSize(const std::u32string msg) {
float GSOsdManager::StringSize(const std::u32string msg)
{
char32_t p = 0;
float x = 0.0;
for(auto c : msg) {
if(p) {
x += m_kern_info[std::make_pair(p, c)] * (2.0f/m_real_size.x);
for (auto c : msg)
{
if (p)
{
x += m_kern_info[std::make_pair(p, c)] * (2.0f / m_real_size.x);
}
/* Advance the cursor to the start of the next character */
x += m_char_info[c].ax * (2.0f/m_real_size.x);
x += m_char_info[c].ax * (2.0f / m_real_size.x);
p = c;
}
@ -349,37 +407,43 @@ float GSOsdManager::StringSize(const std::u32string msg) {
return x;
}
size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) {
size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count)
{
size_t drawn = 0;
float opacity = m_opacity * 0.01f;
if(m_log_enabled) {
if (m_log_enabled)
{
float offset = 0;
for(auto it = m_log.begin(); it != m_log.end();) {
float x = -1 + 8 * (2.0f/m_real_size.x);
float y = 1 - ((m_size+2)*(it-m_log.begin()+1)) * (2.0f/m_real_size.y);
for (auto it = m_log.begin(); it != m_log.end();)
{
float x = -1 + 8 * (2.0f / m_real_size.x);
float y = 1 - ((m_size + 2) * (it - m_log.begin() + 1)) * (2.0f / m_real_size.y);
if(y + offset < -1) break;
if (y + offset < -1)
break;
if(it->OnScreen.time_since_epoch().count() == 0)
if (it->OnScreen.time_since_epoch().count() == 0)
it->OnScreen = std::chrono::system_clock::now();
std::chrono::duration<float> elapsed = std::chrono::system_clock::now() - it->OnScreen;
if(elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages) {
if (elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages)
{
m_onscreen_messages--;
it = m_log.erase(it);
continue;
}
if(it->msg.size() * 6 > count - drawn) break;
if (it->msg.size() * 6 > count - drawn)
break;
float ratio = (elapsed - std::chrono::seconds(m_log_timeout/2)).count() / std::chrono::seconds(m_log_timeout/2).count();
float ratio = (elapsed - std::chrono::seconds(m_log_timeout / 2)).count() / std::chrono::seconds(m_log_timeout / 2).count();
ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : ratio;
y += offset += ((m_size+2) * (2.0f/m_real_size.y)) * ratio;
y += offset += ((m_size + 2) * (2.0f / m_real_size.y)) * ratio;
uint32 color = m_color;
((uint8 *)&color)[3] = (uint8)(((uint8 *)&color)[3] * (1.0f - ratio) * opacity);
((uint8*)&color)[3] = (uint8)(((uint8*)&color)[3] * (1.0f - ratio) * opacity);
RenderString(dst, it->msg, x, y, color);
dst += it->msg.size() * 6;
drawn += it->msg.size() * 6;
@ -387,13 +451,15 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) {
}
}
if(m_monitor_enabled) {
if (m_monitor_enabled)
{
// pair.first is the key and second is the value and color
// Since the monitor is right justified, but we render from left to right
// we need to find the longest string
float first_max = 0.0, second_max = 0.0;
for(const auto &pair : m_monitor) {
for (const auto& pair : m_monitor)
{
float first_len = StringSize(pair.first);
float second_len = StringSize(pair.second);
@ -402,18 +468,20 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) {
}
size_t line = 1;
for(const auto &pair : m_monitor) {
if((pair.first.size() + pair.second.size()) * 6 > count - drawn) break;
for (const auto& pair : m_monitor)
{
if ((pair.first.size() + pair.second.size()) * 6 > count - drawn)
break;
// Calculate where to start rendering from by taking the right most position 1.0
// and subtracting (going left) 8 scaled pixels for a margin, then subtracting
// the size of the longest key and subtracting a scaled space and finally
// subtracting the longest value
float x = 1.0f - 8 * (2.0f/m_real_size.x) - first_max - m_char_info[' '].ax * (2.0f/m_real_size.x) - second_max;
float y = -1.0f + ((m_size+2)*(2.0f/m_real_size.y)) * line++;
float x = 1.0f - 8 * (2.0f / m_real_size.x) - first_max - m_char_info[' '].ax * (2.0f / m_real_size.x) - second_max;
float y = -1.0f + ((m_size + 2) * (2.0f / m_real_size.y)) * line++;
uint32 color = m_color;
((uint8 *)&color)[3] = (uint8)(((uint8 *)&color)[3] * opacity);
((uint8*)&color)[3] = (uint8)(((uint8*)&color)[3] * opacity);
// Render the key
RenderString(dst, pair.first, x, y, color);
@ -421,7 +489,7 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) {
drawn += pair.first.size() * 6;
// Calculate the position for the value
x = 1.0f - 8 * (2.0f/m_real_size.x) - second_max;
x = 1.0f - 8 * (2.0f / m_real_size.x) - second_max;
// Render the value
RenderString(dst, pair.second, x, y, color);
@ -432,4 +500,3 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) {
return drawn;
}

View File

@ -28,8 +28,10 @@
#include <ft2build.h>
#include FT_FREETYPE_H
class GSOsdManager {
struct glyph_info {
class GSOsdManager
{
struct glyph_info
{
int32 ax; // advance.x
int32 ay; // advance.y
@ -48,15 +50,16 @@ class GSOsdManager {
std::map<std::pair<char32_t, char32_t>, FT_Pos> m_kern_info;
FT_Library m_library;
FT_Face m_face;
FT_UInt m_size;
FT_Face m_face;
FT_UInt m_size;
uint32 m_atlas_h;
uint32 m_atlas_w;
int32 m_max_width;
int32 m_onscreen_messages;
struct log_info {
struct log_info
{
std::u32string msg;
std::chrono::system_clock::time_point OnScreen;
};
@ -76,8 +79,7 @@ class GSOsdManager {
uint32 m_color;
int m_max_onscreen_messages;
public:
public:
GSOsdManager();
~GSOsdManager();
@ -89,14 +91,13 @@ class GSOsdManager {
bool m_texture_dirty;
void upload_texture_atlas(GSTexture* t);
void Log(const char *utf8);
void Monitor(const char *key, const char *value);
void Log(const char* utf8);
void Monitor(const char* key, const char* value);
GSVector2i m_real_size;
size_t Size();
size_t GeneratePrimitives(GSVertexPT1* dst, size_t count);
private:
private:
std::vector<char> resource_data_buffer;
};

View File

@ -35,7 +35,7 @@ GSRenderer::GSRenderer()
, m_shift_key(false)
, m_control_key(false)
, m_texture_shuffle(false)
, m_real_size(0,0)
, m_real_size(0, 0)
, m_wnd()
, m_dev(NULL)
{
@ -67,7 +67,7 @@ bool GSRenderer::CreateDevice(GSDevice* dev)
ASSERT(dev);
ASSERT(!m_dev);
if(!dev->Create(m_wnd))
if (!dev->Create(m_wnd))
{
return false;
}
@ -80,7 +80,8 @@ bool GSRenderer::CreateDevice(GSDevice* dev)
void GSRenderer::ResetDevice()
{
if(m_dev) m_dev->Reset(1, 1);
if (m_dev)
m_dev->Reset(1, 1);
}
bool GSRenderer::Merge(int field)
@ -90,14 +91,14 @@ bool GSRenderer::Merge(int field)
GSVector4i fr[2];
GSVector4i dr[2];
GSVector2i display_baseline = { INT_MAX, INT_MAX };
GSVector2i frame_baseline = { INT_MAX, INT_MAX };
GSVector2i display_baseline = {INT_MAX, INT_MAX};
GSVector2i frame_baseline = {INT_MAX, INT_MAX};
for(int i = 0; i < 2; i++)
for (int i = 0; i < 2; i++)
{
en[i] = IsEnabled(i);
if(en[i])
if (en[i])
{
fr[i] = GetFrameRect(i);
dr[i] = GetDisplayRect(i);
@ -111,7 +112,7 @@ bool GSRenderer::Merge(int field)
}
}
if(!en[0] && !en[1])
if (!en[0] && !en[1])
{
return false;
}
@ -128,7 +129,7 @@ bool GSRenderer::Merge(int field)
m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW &&
m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM;
if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/)
if (samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/)
{
// persona 4:
//
@ -173,38 +174,42 @@ bool GSRenderer::Merge(int field)
GSVector2i ds(0, 0);
GSTexture* tex[3] = {NULL, NULL, NULL};
int y_offset[3] = {0, 0, 0};
int y_offset[3] = {0, 0, 0};
s_n++;
bool feedback_merge = m_regs->EXTWRITE.WRITE == 1;
if(samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge)
if (samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge)
{
tex[0] = GetOutput(0, y_offset[0]);
tex[1] = tex[0]; // saves one texture fetch
tex[0] = GetOutput(0, y_offset[0]);
tex[1] = tex[0]; // saves one texture fetch
y_offset[1] = y_offset[0];
}
else
{
if(en[0]) tex[0] = GetOutput(0, y_offset[0]);
if(en[1]) tex[1] = GetOutput(1, y_offset[1]);
if(feedback_merge) tex[2] = GetFeedbackOutput();
if (en[0])
tex[0] = GetOutput(0, y_offset[0]);
if (en[1])
tex[1] = GetOutput(1, y_offset[1]);
if (feedback_merge)
tex[2] = GetFeedbackOutput();
}
GSVector4 src[2];
GSVector4 src_hw[2];
GSVector4 dst[2];
for(int i = 0; i < 2; i++)
for (int i = 0; i < 2; i++)
{
if(!en[i] || !tex[i]) continue;
if (!en[i] || !tex[i])
continue;
GSVector4i r = fr[i];
GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy();
src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy();
src_hw[i] = (GSVector4(r) + GSVector4 (0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy();
src_hw[i] = (GSVector4(r) + GSVector4(0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy();
GSVector2 off(0);
GSVector2i display_diff(dr[i].left - display_baseline.x, dr[i].top - display_baseline.y);
@ -212,26 +217,26 @@ bool GSRenderer::Merge(int field)
// Time Crisis 2/3 uses two side by side images when in split screen mode.
// Though ignore cases where baseline and display rectangle offsets only differ by 1 pixel, causes blurring and wrong resolution output on FFXII
if(display_diff.x > 2)
if (display_diff.x > 2)
{
off.x = tex[i]->GetScale().x * display_diff.x;
}
// If the DX offset is too small then consider the status of frame memory offsets, prevents blurring on Tenchu: Fatal Shadows, Worms 3D
else if(display_diff.x != frame_diff.x)
else if (display_diff.x != frame_diff.x)
{
off.x = tex[i]->GetScale().x * frame_diff.x;
}
if(display_diff.y >= 4) // Shouldn't this be >= 2?
if (display_diff.y >= 4) // Shouldn't this be >= 2?
{
off.y = tex[i]->GetScale().y * display_diff.y;
if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
if (m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
{
off.y /= 2;
}
}
else if(display_diff.y != frame_diff.y)
else if (display_diff.y != frame_diff.y)
{
off.y = tex[i]->GetScale().y * frame_diff.y;
}
@ -244,7 +249,7 @@ bool GSRenderer::Merge(int field)
ds = fs;
if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
if (m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
{
ds.y *= 2;
}
@ -252,9 +257,9 @@ bool GSRenderer::Merge(int field)
bool slbg = m_regs->PMODE.SLBG;
if(tex[0] || tex[1])
if (tex[0] || tex[1])
{
if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue())
if (tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue())
{
// the two outputs are identical, skip drawing one of them (the one that is alpha blended)
@ -265,9 +270,9 @@ bool GSRenderer::Merge(int field)
m_dev->Merge(tex, src_hw, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c);
if(m_regs->SMODE2.INT && m_interlace > 0)
if (m_regs->SMODE2.INT && m_interlace > 0)
{
if(m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting
if (m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting
{
int field2 = 0;
int mode = 2;
@ -281,17 +286,17 @@ bool GSRenderer::Merge(int field)
}
}
if(m_shadeboost)
if (m_shadeboost)
{
m_dev->ShadeBoost();
}
if(m_shaderfx)
if (m_shaderfx)
{
m_dev->ExternalFX();
}
if(m_fxaa)
if (m_fxaa)
{
m_dev->FXAA();
}
@ -309,7 +314,8 @@ void GSRenderer::SetVSync(int vsync)
{
m_vsync = vsync;
if(m_dev) m_dev->SetVSync(m_vsync);
if (m_dev)
m_dev->SetVSync(m_vsync);
}
void GSRenderer::VSync(int field)
@ -320,14 +326,14 @@ void GSRenderer::VSync(int field)
Flush();
if(s_dump && s_n >= s_saven)
if (s_dump && s_n >= s_saven)
{
m_regs->Dump(root_sw + format("%05d_f%lld_gs_reg.txt", s_n, m_perfmon.GetFrame()));
}
if(!m_dev->IsLost(true))
if (!m_dev->IsLost(true))
{
if(!Merge(field ? 1 : 0))
if (!Merge(field ? 1 : 0))
{
return;
}
@ -341,7 +347,7 @@ void GSRenderer::VSync(int field)
// osd
if((m_perfmon.GetFrame() & 0x1f) == 0)
if ((m_perfmon.GetFrame() & 0x1f) == 0)
{
m_perfmon.Update();
@ -350,9 +356,9 @@ void GSRenderer::VSync(int field)
std::string s;
#ifdef GSTITLEINFO_API_FORCE_VERBOSE
if(1)//force verbose reply
if (1) //force verbose reply
#else
if(m_wnd->IsManaged())
if (m_wnd->IsManaged())
#endif
{
//GSdx owns the window's title, be verbose.
@ -370,18 +376,17 @@ void GSRenderer::VSync(int field)
(int)m_perfmon.Get(GSPerfMon::Draw),
m_perfmon.CPU(),
m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
m_perfmon.Get(GSPerfMon::Unswizzle) / 1024
);
m_perfmon.Get(GSPerfMon::Unswizzle) / 1024);
double fillrate = m_perfmon.Get(GSPerfMon::Fillrate);
if(fillrate > 0)
if (fillrate > 0)
{
s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024));
int sum = 0;
for(int i = 0; i < 16; i++)
for (int i = 0; i < 16; i++)
{
sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i);
}
@ -396,12 +401,12 @@ void GSRenderer::VSync(int field)
s = format("%dx%d | %s", GetInternalResolution().x, GetInternalResolution().y, theApp.m_gs_interlace[m_interlace].name.c_str());
}
if(m_capture.IsCapturing())
if (m_capture.IsCapturing())
{
s += " | Recording...";
}
if(m_wnd->IsManaged())
if (m_wnd->IsManaged())
{
m_wnd->SetWindowText(s.c_str());
}
@ -426,7 +431,7 @@ void GSRenderer::VSync(int field)
// so let's use actual OSD!
}
if(m_frameskip)
if (m_frameskip)
{
return;
}
@ -443,9 +448,9 @@ void GSRenderer::VSync(int field)
// snapshot
if(!m_snapshot.empty())
if (!m_snapshot.empty())
{
if(!m_dump && m_shift_key)
if (!m_dump && m_shift_key)
{
GSFreezeData fd = {0, nullptr};
Freeze(&fd, true);
@ -457,35 +462,35 @@ void GSRenderer::VSync(int field)
else
m_dump = std::unique_ptr<GSDumpBase>(new GSDumpXz(m_snapshot, m_crc, fd, m_regs));
delete [] fd.data;
delete[] fd.data;
}
if(GSTexture* t = m_dev->GetCurrent())
if (GSTexture* t = m_dev->GetCurrent())
{
t->Save(m_snapshot + ".png");
}
m_snapshot.clear();
}
else if(m_dump)
else if (m_dump)
{
if(m_dump->VSync(field, !m_control_key, m_regs))
if (m_dump->VSync(field, !m_control_key, m_regs))
m_dump.reset();
}
// capture
if(m_capture.IsCapturing())
if (m_capture.IsCapturing())
{
if(GSTexture* current = m_dev->GetCurrent())
if (GSTexture* current = m_dev->GetCurrent())
{
GSVector2i size = m_capture.GetSize();
if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y))
if (GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y))
{
GSTexture::GSMap m;
if(offscreen->Map(m))
if (offscreen->Map(m))
{
m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped());
@ -552,7 +557,7 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
m_shift_key = !!(::GetAsyncKeyState(VK_SHIFT) & 0x8000);
m_control_key = !!(::GetAsyncKeyState(VK_CONTROL) & 0x8000);
#else
switch(e->key)
switch (e->key)
{
case XK_Shift_L:
case XK_Shift_R:
@ -565,7 +570,7 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
}
#endif
if(e->type == KEYPRESS)
if (e->type == KEYPRESS)
{
int step = m_shift_key ? -1 : 1;
@ -580,44 +585,43 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
#define VK_HOME XK_Home
#endif
switch(e->key)
switch (e->key)
{
case VK_F5:
m_interlace = (m_interlace + s_interlace_nb + step) % s_interlace_nb;
theApp.SetConfig("interlace", m_interlace);
printf("GSdx: Set deinterlace mode to %d (%s).\n", m_interlace, theApp.m_gs_interlace.at(m_interlace).name.c_str());
return;
case VK_F6:
if( m_wnd->IsManaged() )
m_aspectratio = (m_aspectratio + s_aspect_ratio_nb + step) % s_aspect_ratio_nb;
return;
case VK_DELETE:
m_aa1 = !m_aa1;
theApp.SetConfig("aa1", m_aa1);
printf("GSdx: (Software) Edge anti-aliasing is now %s.\n", m_aa1 ? "enabled" : "disabled");
return;
case VK_INSERT:
m_mipmap = (m_mipmap + s_mipmap_nb + step) % s_mipmap_nb;
theApp.SetConfig("mipmap_hw", m_mipmap);
printf("GSdx: Mipmapping is now %s.\n", theApp.m_gs_hack.at(m_mipmap).name.c_str());
return;
case VK_PRIOR:
m_fxaa = !m_fxaa;
theApp.SetConfig("fxaa", m_fxaa);
printf("GSdx: FXAA anti-aliasing is now %s.\n", m_fxaa ? "enabled" : "disabled");
return;
case VK_HOME:
m_shaderfx = !m_shaderfx;
theApp.SetConfig("shaderfx", m_shaderfx);
printf("GSdx: External post-processing is now %s.\n", m_shaderfx ? "enabled" : "disabled");
return;
case VK_NEXT: // As requested by Prafull, to be removed later
char dither_msg[3][16] = {"disabled", "auto", "auto unscaled"};
m_dithering = (m_dithering+1)%3;
printf("GSdx: Dithering is now %s.\n", dither_msg[m_dithering]);
return;
case VK_F5:
m_interlace = (m_interlace + s_interlace_nb + step) % s_interlace_nb;
theApp.SetConfig("interlace", m_interlace);
printf("GSdx: Set deinterlace mode to %d (%s).\n", m_interlace, theApp.m_gs_interlace.at(m_interlace).name.c_str());
return;
case VK_F6:
if (m_wnd->IsManaged())
m_aspectratio = (m_aspectratio + s_aspect_ratio_nb + step) % s_aspect_ratio_nb;
return;
case VK_DELETE:
m_aa1 = !m_aa1;
theApp.SetConfig("aa1", m_aa1);
printf("GSdx: (Software) Edge anti-aliasing is now %s.\n", m_aa1 ? "enabled" : "disabled");
return;
case VK_INSERT:
m_mipmap = (m_mipmap + s_mipmap_nb + step) % s_mipmap_nb;
theApp.SetConfig("mipmap_hw", m_mipmap);
printf("GSdx: Mipmapping is now %s.\n", theApp.m_gs_hack.at(m_mipmap).name.c_str());
return;
case VK_PRIOR:
m_fxaa = !m_fxaa;
theApp.SetConfig("fxaa", m_fxaa);
printf("GSdx: FXAA anti-aliasing is now %s.\n", m_fxaa ? "enabled" : "disabled");
return;
case VK_HOME:
m_shaderfx = !m_shaderfx;
theApp.SetConfig("shaderfx", m_shaderfx);
printf("GSdx: External post-processing is now %s.\n", m_shaderfx ? "enabled" : "disabled");
return;
case VK_NEXT: // As requested by Prafull, to be removed later
char dither_msg[3][16] = {"disabled", "auto", "auto unscaled"};
m_dithering = (m_dithering + 1) % 3;
printf("GSdx: Dithering is now %s.\n", dither_msg[m_dithering]);
return;
}
}
}

View File

@ -65,11 +65,11 @@ public:
virtual void VSync(int field);
virtual bool MakeSnapshot(const std::string& path);
virtual void KeyEvent(GSKeyEventData* e);
virtual bool CanUpscale() {return false;}
virtual int GetUpscaleMultiplier() {return 1;}
virtual GSVector2i GetCustomResolution() {return GSVector2i(0,0);}
virtual bool CanUpscale() { return false; }
virtual int GetUpscaleMultiplier() { return 1; }
virtual GSVector2i GetCustomResolution() { return GSVector2i(0, 0); }
GSVector2i GetInternalResolution();
void SetAspectRatio(int aspect) {m_aspectratio = aspect;}
void SetAspectRatio(int aspect) { m_aspectratio = aspect; }
void SetVSync(int vsync);
virtual bool BeginCapture(std::string& filename);

View File

@ -35,15 +35,32 @@ protected:
bool m_sparse;
public:
struct GSMap {uint8* bits; int pitch;};
struct GSMap
{
uint8* bits;
int pitch;
};
enum {RenderTarget = 1, DepthStencil, Texture, Offscreen, Backbuffer, SparseRenderTarget, SparseDepthStencil};
enum
{
RenderTarget = 1,
DepthStencil,
Texture,
Offscreen,
Backbuffer,
SparseRenderTarget,
SparseDepthStencil
};
public:
GSTexture();
virtual ~GSTexture() {}
virtual operator bool() {ASSERT(0); return false;}
virtual operator bool()
{
ASSERT(0);
return false;
}
virtual bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) = 0;
virtual bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) = 0;
@ -52,17 +69,17 @@ public:
virtual bool Save(const std::string& fn) = 0;
virtual uint32 GetID() { return 0; }
GSVector2 GetScale() const {return m_scale;}
void SetScale(const GSVector2& scale) {m_scale = scale;}
GSVector2 GetScale() const { return m_scale; }
void SetScale(const GSVector2& scale) { m_scale = scale; }
int GetWidth() const {return m_size.x;}
int GetHeight() const {return m_size.y;}
GSVector2i GetSize() const {return m_size;}
int GetWidth() const { return m_size.x; }
int GetHeight() const { return m_size.y; }
GSVector2i GetSize() const { return m_size; }
int GetType() const {return m_type;}
int GetFormat() const {return m_format;}
int GetType() const { return m_type; }
int GetFormat() const { return m_format; }
virtual void CommitPages(const GSVector2i& region, bool commit) {};
virtual void CommitPages(const GSVector2i& region, bool commit) {}
void CommitRegion(const GSVector2i& region);
void Commit();
void Uncommit();

View File

@ -34,11 +34,11 @@ struct alignas(32) GSVertex
{
struct
{
GIFRegST ST; // S:0, T:4
GIFRegST ST; // S:0, T:4
GIFRegRGBAQ RGBAQ; // RGBA:8, Q:12
GIFRegXYZ XYZ; // XY:16, Z:20
union {uint32 UV; struct {uint16 U, V;};}; // UV:24
uint32 FOG; // FOG:28
GIFRegXYZ XYZ; // XY:16, Z:20
union { uint32 UV; struct { uint16 U, V; }; }; // UV:24
uint32 FOG; // FOG:28
};
#if _M_SSE >= 0x500
@ -50,11 +50,22 @@ struct alignas(32) GSVertex
GSVertex() = default; // Warning object is potentially used in hot path
#if _M_SSE >= 0x500
GSVertex(const GSVertex& v) {mx = v.mx;}
void operator = (const GSVertex& v) {mx = v.mx;}
GSVertex(const GSVertex& v)
{
mx = v.mx;
}
void operator=(const GSVertex& v) { mx = v.mx; }
#else
GSVertex(const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];}
void operator = (const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];}
GSVertex(const GSVertex& v)
{
m[0] = v.m[0];
m[1] = v.m[1];
}
void operator=(const GSVertex& v)
{
m[0] = v.m[0];
m[1] = v.m[1];
}
#endif
};
@ -68,7 +79,7 @@ struct alignas(32) GSVertexPT1
GSVector4 p;
GSVector2 t;
char pad[4];
union {uint32 c; struct {uint8 r, g, b, a;};};
union { uint32 c; struct { uint8 r, g, b, a; }; };
};
struct GSVertexPT2

View File

@ -21,7 +21,8 @@
#pragma once
template <class Vertex> class GSVertexList
template <class Vertex>
class GSVertexList
{
void* m_base;
Vertex* m_v[3];
@ -33,7 +34,7 @@ public:
{
m_base = _aligned_malloc(sizeof(Vertex) * countof(m_v), 32);
for(size_t i = 0; i < countof(m_v); i++)
for (size_t i = 0; i < countof(m_v); i++)
{
m_v[i] = &((Vertex*)m_base)[i];
}
@ -58,13 +59,13 @@ public:
__forceinline void RemoveAt(int pos, int keep)
{
if(keep == 1)
if (keep == 1)
{
Vertex* tmp = m_v[pos + 0];
m_v[pos + 0] = m_v[pos + 1];
m_v[pos + 1] = tmp;
}
else if(keep == 2)
else if (keep == 2)
{
Vertex* tmp = m_v[pos + 0];
m_v[pos + 0] = m_v[pos + 1];

View File

@ -68,7 +68,8 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
// Potential float overflow detected. Better uses the slower division instead
// Note: If Q is too big, 1/Q will end up as 0. 1e30 is a random number
// that feel big enough.
if (!fst && !m_accurate_stq && m_min.t.z > 1e30) {
if (!fst && !m_accurate_stq && m_min.t.z > 1e30)
{
fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z);
m_accurate_stq = true;
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
@ -79,18 +80,19 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
m_alpha.valid = false;
// I'm not sure of the cost. In doubt let's do it only when depth is enabled
if(m_state->m_context->TEST.ZTE == 1 && m_state->m_context->TEST.ZTST > ZTST_ALWAYS) {
if (m_state->m_context->TEST.ZTE == 1 && m_state->m_context->TEST.ZTST > ZTST_ALWAYS)
{
CorrectDepthTrace(vertex, v_count);
}
if(m_state->PRIM->TME)
if (m_state->PRIM->TME)
{
const GIFRegTEX1& TEX1 = m_state->m_context->TEX1;
m_filter.mmag = TEX1.IsMagLinear();
m_filter.mmin = TEX1.IsMinLinear();
if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2
if (TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2
{
m_filter.linear = m_filter.mmag;
}
@ -98,13 +100,18 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
{
float K = (float)TEX1.K / 16;
if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated
if (TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated
{
// LOD = log2(1/|Q|) * (1 << L) + K
GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K);
if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;}
if (m_lod.x > m_lod.y)
{
float tmp = m_lod.x;
m_lod.x = m_lod.y;
m_lod.y = tmp;
}
}
else
{
@ -112,11 +119,11 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
m_lod.y = K;
}
if(m_lod.y <= 0)
if (m_lod.y <= 0)
{
m_filter.linear = m_filter.mmag;
}
else if(m_lod.x > 0)
else if (m_lod.x > 0)
{
m_filter.linear = m_filter.mmin;
}
@ -149,25 +156,25 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
}
}
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
{
const GSDrawingContext* context = m_state->m_context;
int n = 1;
switch(primclass)
switch (primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
GSVector4 tmin = s_minmax.xxxx();
@ -180,21 +187,21 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
const GSVertex* RESTRICT v = (GSVertex*)vertex;
for(int i = 0; i < count; i += n)
for (int i = 0; i < count; i += n)
{
if(primclass == GS_POINT_CLASS)
if (primclass == GS_POINT_CLASS)
{
GSVector4i c(v[index[i]].m[0]);
if(color)
if (color)
{
cmin = cmin.min_u8(c);
cmax = cmax.max_u8(c);
}
if(tme)
if (tme)
{
if(!fst)
if (!fst)
{
GSVector4 stq = GSVector4::cast(c);
@ -229,14 +236,14 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
pmin = pmin.min_u32(p);
pmax = pmax.max_u32(p);
}
else if(primclass == GS_LINE_CLASS)
else if (primclass == GS_LINE_CLASS)
{
GSVector4i c0(v[index[i + 0]].m[0]);
GSVector4i c1(v[index[i + 1]].m[0]);
if(color)
if (color)
{
if(iip)
if (iip)
{
cmin = cmin.min_u8(c0.min_u8(c1));
cmax = cmax.max_u8(c0.max_u8(c1));
@ -248,14 +255,14 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
}
}
if(tme)
if (tme)
{
if(!fst)
if (!fst)
{
GSVector4 stq0 = GSVector4::cast(c0);
GSVector4 stq1 = GSVector4::cast(c1);
if(accurate_stq)
if (accurate_stq)
{
GSVector4 q = stq0.wwww(stq1);
@ -300,15 +307,15 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
pmin = pmin.min_u32(p0.min_u32(p1));
pmax = pmax.max_u32(p0.max_u32(p1));
}
else if(primclass == GS_TRIANGLE_CLASS)
else if (primclass == GS_TRIANGLE_CLASS)
{
GSVector4i c0(v[index[i + 0]].m[0]);
GSVector4i c1(v[index[i + 1]].m[0]);
GSVector4i c2(v[index[i + 2]].m[0]);
if(color)
if (color)
{
if(iip)
if (iip)
{
cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1));
cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1));
@ -320,15 +327,15 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
}
}
if(tme)
if (tme)
{
if(!fst)
if (!fst)
{
GSVector4 stq0 = GSVector4::cast(c0);
GSVector4 stq1 = GSVector4::cast(c1);
GSVector4 stq2 = GSVector4::cast(c2);
if(accurate_stq)
if (accurate_stq)
{
GSVector4 q = stq0.wwww(stq1).xzww(stq2);
@ -381,14 +388,14 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1));
pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1));
}
else if(primclass == GS_SPRITE_CLASS)
else if (primclass == GS_SPRITE_CLASS)
{
GSVector4i c0(v[index[i + 0]].m[0]);
GSVector4i c1(v[index[i + 1]].m[0]);
if(color)
if (color)
{
if(iip)
if (iip)
{
cmin = cmin.min_u8(c0.min_u8(c1));
cmax = cmax.max_u8(c0.max_u8(c1));
@ -400,14 +407,14 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
}
}
if(tme)
if (tme)
{
if(!fst)
if (!fst)
{
GSVector4 stq0 = GSVector4::cast(c0);
GSVector4 stq1 = GSVector4::cast(c1);
if(accurate_stq)
if (accurate_stq)
{
GSVector4 q = stq1.wwww();
@ -468,9 +475,9 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
m_min.p = (GSVector4(pmin) - o) * s;
m_max.p = (GSVector4(pmax) - o) * s;
if(tme)
if (tme)
{
if(fst)
if (fst)
{
s = GSVector4(1.0f / 16, 1.0f).xxyy();
}
@ -488,7 +495,7 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
m_max.t = GSVector4::zero();
}
if(color)
if (color)
{
m_min.c = cmin.zzzz().u8to32();
m_max.c = cmax.zzzz().u8to32();
@ -518,21 +525,29 @@ void GSVertexTrace::CorrectDepthTrace(const void* vertex, int count)
uint32 z = v[0].XYZ.Z;
// ought to check only 1/2 for sprite
if (z & 1) {
if (z & 1)
{
// Check that first bit is always 1
for (int i = 0; i < count; i++) {
for (int i = 0; i < count; i++)
{
z &= v[i].XYZ.Z;
}
} else {
}
else
{
// Check that first bit is always 0
for (int i = 0; i < count; i++) {
for (int i = 0; i < count; i++)
{
z |= v[i].XYZ.Z;
}
}
if (z == v[0].XYZ.Z) {
if (z == v[0].XYZ.Z)
{
m_eq.z = 1;
} else {
}
else
{
m_eq.z = 0;
}
}

View File

@ -34,8 +34,16 @@ class alignas(32) GSVertexTrace : public GSAlignedClass<32>
BiFiltering m_force_filter;
public:
struct Vertex {GSVector4i c; GSVector4 p, t;};
struct VertexAlpha {int min, max; bool valid;};
struct Vertex
{
GSVector4i c;
GSVector4 p, t;
};
struct VertexAlpha
{
int min, max;
bool valid;
};
bool m_accurate_stq;
protected:
@ -47,7 +55,7 @@ protected:
FindMinMaxPtr m_fmm[2][2][2][2][2][4];
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color, uint32 accurate_stq>
void FindMinMax(const void* vertex, const uint32* index, int count);
public:
@ -60,13 +68,13 @@ public:
union
{
uint32 value;
struct {uint32 r:4, g:4, b:4, a:4, x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1;};
struct {uint32 rgba:16, xyzf:4, stq:4;};
struct { uint32 r:4, g:4, b:4, a:4, x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1; };
struct { uint32 rgba:16, xyzf:4, stq:4; };
} m_eq;
union
{
struct {uint32 mmag:1, mmin:1, linear:1, opt_linear:1;};
struct { uint32 mmag:1, mmin:1, linear:1, opt_linear:1; };
} m_filter;
GSVector2 m_lod; // x = min, y = max
@ -77,8 +85,8 @@ public:
void Update(const void* vertex, const uint32* index, int v_count, int i_count, GS_PRIM_CLASS primclass);
bool IsLinear() const {return m_filter.opt_linear;}
bool IsRealLinear() const {return m_filter.linear;}
bool IsLinear() const { return m_filter.opt_linear; }
bool IsRealLinear() const { return m_filter.linear; }
void CorrectDepthTrace(const void* vertex, int count);
};

View File

@ -54,40 +54,40 @@ bool GSDevice11::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode)
switch (level)
{
case D3D_FEATURE_LEVEL_10_0:
m_shader.model = "0x400";
m_shader.vs = "vs_4_0";
m_shader.gs = "gs_4_0";
m_shader.ps = "ps_4_0";
m_shader.cs = "cs_4_0";
break;
case D3D_FEATURE_LEVEL_10_1:
m_shader.model = "0x401";
m_shader.vs = "vs_4_1";
m_shader.gs = "gs_4_1";
m_shader.ps = "ps_4_1";
m_shader.cs = "cs_4_1";
break;
case D3D_FEATURE_LEVEL_11_0:
m_shader.model = "0x500";
m_shader.vs = "vs_5_0";
m_shader.gs = "gs_5_0";
m_shader.ps = "ps_5_0";
m_shader.cs = "cs_5_0";
break;
default:
ASSERT(0);
return false;
case D3D_FEATURE_LEVEL_10_0:
m_shader.model = "0x400";
m_shader.vs = "vs_4_0";
m_shader.gs = "gs_4_0";
m_shader.ps = "ps_4_0";
m_shader.cs = "cs_4_0";
break;
case D3D_FEATURE_LEVEL_10_1:
m_shader.model = "0x401";
m_shader.vs = "vs_4_1";
m_shader.gs = "gs_4_1";
m_shader.ps = "ps_4_1";
m_shader.cs = "cs_4_1";
break;
case D3D_FEATURE_LEVEL_11_0:
m_shader.model = "0x500";
m_shader.vs = "vs_5_0";
m_shader.gs = "gs_5_0";
m_shader.ps = "ps_5_0";
m_shader.cs = "cs_5_0";
break;
default:
ASSERT(0);
return false;
}
return true;
}
bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
bool GSDevice11::Create(const std::shared_ptr<GSWnd>& wnd)
{
bool nvidia_vendor = false;
if(!__super::Create(wnd))
if (!__super::Create(wnd))
{
return false;
}
@ -160,8 +160,7 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
const HRESULT result = D3D11CreateDevice(
adapter, driver_type, nullptr, flags,
supported_levels.data(), supported_levels.size(),
D3D11_SDK_VERSION, &m_dev, &level, &m_ctx
);
D3D11_SDK_VERSION, &m_dev, &level, &m_ctx);
if (FAILED(result))
{
@ -189,8 +188,7 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
const HRESULT result = m_factory->CreateSwapChainForHwnd(
m_dev, reinterpret_cast<HWND>(m_wnd->GetHandle()),
&swapchain_description, nullptr, nullptr, &m_swapchain
);
&swapchain_description, nullptr, nullptr, &m_swapchain);
if (FAILED(result))
{
@ -199,7 +197,7 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
}
}
if(!SetFeatureLevel(level, true))
if (!SetFeatureLevel(level, true))
return false;
// Set maximum texture size limit based on supported feature level.
@ -208,7 +206,8 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
else
m_d3d_texsize = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
{ // HACK: check nVIDIA
{
// HACK: check nVIDIA
// Note: It can cause issues on several games such as SOTC, Fatal Frame, plus it adds border offset.
bool disable_safe_features = theApp.GetConfigB("UserHacks") && theApp.GetConfigB("UserHacks_Disable_Safe_Features");
m_hack_topleft_offset = (m_upscale_multiplier != 1 && nvidia_vendor && !disable_safe_features) ? -0.01f : 0.0f;
@ -256,9 +255,9 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
D3D_SHADER_MACRO* sm_convert_ptr = sm_convert.GetPtr();
for(size_t i = 0; i < countof(m_convert.ps); i++)
for (size_t i = 0; i < countof(m_convert.ps); i++)
{
CreateShader(shader, "convert.fx", nullptr, format("ps_main%d", i).c_str(), sm_convert_ptr, & m_convert.ps[i]);
CreateShader(shader, "convert.fx", nullptr, format("ps_main%d", i).c_str(), sm_convert_ptr, &m_convert.ps[i]);
}
memset(&dsd, 0, sizeof(dsd));
@ -288,7 +287,7 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
hr = m_dev->CreateBuffer(&bd, NULL, &m_merge.cb);
theApp.LoadResource(IDR_MERGE_FX, shader);
for(size_t i = 0; i < countof(m_merge.ps); i++)
for (size_t i = 0; i < countof(m_merge.ps); i++)
{
CreateShader(shader, "merge.fx", nullptr, format("ps_main%d", i).c_str(), sm_model.GetPtr(), &m_merge.ps[i]);
}
@ -317,7 +316,7 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
hr = m_dev->CreateBuffer(&bd, NULL, &m_interlace.cb);
theApp.LoadResource(IDR_INTERLACE_FX, shader);
for(size_t i = 0; i < countof(m_interlace.ps); i++)
for (size_t i = 0; i < countof(m_interlace.ps); i++)
{
CreateShader(shader, "interlace.fx", nullptr, format("ps_main%d", i).c_str(), sm_model.GetPtr(), &m_interlace.ps[i]);
}
@ -435,18 +434,17 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
GSVector2i tex_font = m_osd.get_texture_font_size();
m_font = std::unique_ptr<GSTexture>(
CreateSurface(GSTexture::Texture, tex_font.x, tex_font.y, DXGI_FORMAT_R8_UNORM)
);
CreateSurface(GSTexture::Texture, tex_font.x, tex_font.y, DXGI_FORMAT_R8_UNORM));
return true;
}
bool GSDevice11::Reset(int w, int h)
{
if(!__super::Reset(w, h))
if (!__super::Reset(w, h))
return false;
if(m_swapchain)
if (m_swapchain)
{
DXGI_SWAP_CHAIN_DESC scd;
@ -457,7 +455,7 @@ bool GSDevice11::Reset(int w, int h)
CComPtr<ID3D11Texture2D> backbuffer;
if(FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer)))
if (FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer)))
{
return false;
}
@ -553,13 +551,15 @@ void GSDevice11::DrawIndexedPrimitive(int offset, int count)
void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
{
if (!t) return;
if (!t)
return;
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v);
}
void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c)
{
if (!t) return;
if (!t)
return;
GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255);
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, color.v);
@ -567,13 +567,15 @@ void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c)
void GSDevice11::ClearDepth(GSTexture* t)
{
if (!t) return;
if (!t)
return;
m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_DEPTH, 0.0f, 0);
}
void GSDevice11::ClearStencil(GSTexture* t, uint8 c)
{
if (!t) return;
if (!t)
return;
m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_STENCIL, 0, c);
}
@ -597,24 +599,24 @@ GSTexture* GSDevice11::CreateSurface(int type, int w, int h, int format)
// mipmap = m_mipmap > 1 || m_filter != TriFiltering::None;
bool mipmap = m_mipmap > 1;
int layers = mipmap && format == DXGI_FORMAT_R8G8B8A8_UNORM ? (int)log2(std::max(w,h)) : 1;
int layers = mipmap && format == DXGI_FORMAT_R8G8B8A8_UNORM ? (int)log2(std::max(w, h)) : 1;
switch(type)
switch (type)
{
case GSTexture::RenderTarget:
desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
break;
case GSTexture::DepthStencil:
desc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE;
break;
case GSTexture::Texture:
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
desc.MipLevels = layers;
break;
case GSTexture::Offscreen:
desc.Usage = D3D11_USAGE_STAGING;
desc.CPUAccessFlags |= D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
break;
case GSTexture::RenderTarget:
desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
break;
case GSTexture::DepthStencil:
desc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE;
break;
case GSTexture::Texture:
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
desc.MipLevels = layers;
break;
case GSTexture::Offscreen:
desc.Usage = D3D11_USAGE_STAGING;
desc.CPUAccessFlags |= D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
break;
}
GSTexture11* t = NULL;
@ -623,18 +625,18 @@ GSTexture* GSDevice11::CreateSurface(int type, int w, int h, int format)
hr = m_dev->CreateTexture2D(&desc, NULL, &texture);
if(SUCCEEDED(hr))
if (SUCCEEDED(hr))
{
t = new GSTexture11(texture);
switch(type)
switch (type)
{
case GSTexture::RenderTarget:
ClearRenderTarget(t, 0);
break;
case GSTexture::DepthStencil:
ClearDepth(t);
break;
case GSTexture::RenderTarget:
ClearRenderTarget(t, 0);
break;
case GSTexture::DepthStencil:
ClearDepth(t);
break;
}
}
else
@ -657,14 +659,14 @@ GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sRect, int
{
GSTexture* dst = NULL;
if(format == 0)
if (format == 0)
{
format = DXGI_FORMAT_R8G8B8A8_UNORM;
}
ASSERT(format == DXGI_FORMAT_R8G8B8A8_UNORM || format == DXGI_FORMAT_R16_UINT || format == DXGI_FORMAT_R32_UINT);
if(GSTexture* rt = CreateRenderTarget(w, h, format))
if (GSTexture* rt = CreateRenderTarget(w, h, format))
{
GSVector4 dRect(0, 0, w, h);
@ -672,7 +674,7 @@ GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sRect, int
dst = CreateOffscreen(w, h, format);
if(dst)
if (dst)
{
m_ctx->CopyResource(*(GSTexture11*)dst, *(GSTexture11*)rt);
}
@ -691,7 +693,7 @@ void GSDevice11::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r)
return;
}
D3D11_BOX box = { (UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U };
D3D11_BOX box = {(UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U};
// DX api isn't happy if we pass a box for depth copy
// It complains that depth/multisample must be a full copy
@ -750,16 +752,16 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[ShaderConvert_COPY], nullptr, bs, false);
}
void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs , bool linear)
void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear)
{
if(!sTex || !dTex)
if (!sTex || !dTex)
{
ASSERT(0);
return;
}
bool draw_in_depth = (ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT32] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT24] ||
ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]);
bool draw_in_depth = (ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT32] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT24]
|| ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]);
BeginScene();
@ -837,7 +839,8 @@ void GSDevice11::RenderOsd(GSTexture* dt)
OMSetBlendState(m_merge.bs, 0);
OMSetRenderTargets(dt, NULL);
if(m_osd.m_texture_dirty) {
if (m_osd.m_texture_dirty)
{
m_osd.upload_texture_atlas(m_font.get());
}
@ -876,12 +879,12 @@ void GSDevice11::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex,
ClearRenderTarget(dTex, c);
if(sTex[1] && !slbg)
if (sTex[1] && !slbg)
{
StretchRect(sTex[1], sRect[1], dTex, dRect[1], m_merge.ps[0], NULL, true);
}
if(sTex[0])
if (sTex[0])
{
m_ctx->UpdateSubresource(m_merge.cb, 0, NULL, &c, 0, 0);
@ -911,7 +914,8 @@ void GSDevice11::InitExternalFX()
{
if (!ExShader_Compiled)
{
try {
try
{
std::string config_name(theApp.GetConfigS("shaderfx_conf"));
std::ifstream fconfig(config_name);
std::stringstream shader;
@ -935,7 +939,8 @@ void GSDevice11::InitExternalFX()
fprintf(stderr, "GSdx: External shader '%s' not loaded and will be disabled!\n", shader_name.c_str());
}
}
catch (GSDXRecoverableError) {
catch (GSDXRecoverableError)
{
printf("GSdx: failed to compile external post-processing shader. \n");
}
ExShader_Compiled = true;
@ -968,13 +973,15 @@ void GSDevice11::InitFXAA()
{
if (!FXAA_Compiled)
{
try {
try
{
std::vector<char> shader;
theApp.LoadResource(IDR_FXAA_FX, shader);
ShaderMacro sm(m_shader.model);
CreateShader(shader, "fxaa.fx", nullptr, "ps_main", sm.GetPtr(), &m_fxaa.ps);
}
catch (GSDXRecoverableError) {
catch (GSDXRecoverableError)
{
printf("GSdx: failed to compile fxaa shader.\n");
}
FXAA_Compiled = true;
@ -1066,7 +1073,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
{
void* ptr = NULL;
if(IAMapVertexBuffer(&ptr, stride, count))
if (IAMapVertexBuffer(&ptr, stride, count))
{
GSVector4i::storent(ptr, vertex, count * stride);
@ -1078,7 +1085,7 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
{
ASSERT(m_vertex.count == 0);
if(count * stride > m_vertex.limit * m_vertex.stride)
if (count * stride > m_vertex.limit * m_vertex.stride)
{
m_vb_old = m_vb;
m_vb = NULL;
@ -1087,7 +1094,7 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
m_vertex.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_vb == NULL)
if (m_vb == NULL)
{
D3D11_BUFFER_DESC bd;
@ -1102,12 +1109,13 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
hr = m_dev->CreateBuffer(&bd, NULL, &m_vb);
if(FAILED(hr)) return false;
if (FAILED(hr))
return false;
}
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
if (m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
{
m_vertex.start = 0;
@ -1116,7 +1124,7 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
D3D11_MAPPED_SUBRESOURCE m;
if(FAILED(m_ctx->Map(m_vb, 0, type, 0, &m)))
if (FAILED(m_ctx->Map(m_vb, 0, type, 0, &m)))
{
return false;
}
@ -1138,7 +1146,7 @@ void GSDevice11::IAUnmapVertexBuffer()
void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
{
if(m_state.vb != vb || m_state.vb_stride != stride)
if (m_state.vb != vb || m_state.vb_stride != stride)
{
m_state.vb = vb;
m_state.vb_stride = stride;
@ -1154,7 +1162,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
{
ASSERT(m_index.count == 0);
if(count > m_index.limit)
if (count > m_index.limit)
{
m_ib_old = m_ib;
m_ib = NULL;
@ -1163,7 +1171,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
m_index.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_ib == NULL)
if (m_ib == NULL)
{
D3D11_BUFFER_DESC bd;
@ -1178,12 +1186,13 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
hr = m_dev->CreateBuffer(&bd, NULL, &m_ib);
if(FAILED(hr)) return;
if (FAILED(hr))
return;
}
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
if(m_index.start + count > m_index.limit)
if (m_index.start + count > m_index.limit)
{
m_index.start = 0;
@ -1192,7 +1201,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
D3D11_MAPPED_SUBRESOURCE m;
if(SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m)))
if (SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m)))
{
memcpy((uint8*)m.pData + m_index.start * sizeof(uint32), index, count * sizeof(uint32));
@ -1206,7 +1215,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib)
{
if(m_state.ib != ib)
if (m_state.ib != ib)
{
m_state.ib = ib;
@ -1216,7 +1225,7 @@ void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib)
void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
{
if(m_state.layout != layout)
if (m_state.layout != layout)
{
m_state.layout = layout;
@ -1226,7 +1235,7 @@ void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology)
{
if(m_state.topology != topology)
if (m_state.topology != topology)
{
m_state.topology = topology;
@ -1236,14 +1245,14 @@ void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology)
void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb)
{
if(m_state.vs != vs)
if (m_state.vs != vs)
{
m_state.vs = vs;
m_ctx->VSSetShader(vs, NULL, 0);
}
if(m_state.vs_cb != vs_cb)
if (m_state.vs_cb != vs_cb)
{
m_state.vs_cb = vs_cb;
@ -1253,7 +1262,7 @@ void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb)
void GSDevice11::GSSetShader(ID3D11GeometryShader* gs, ID3D11Buffer* gs_cb)
{
if(m_state.gs != gs)
if (m_state.gs != gs)
{
m_state.gs = gs;
@ -1273,7 +1282,7 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
PSSetShaderResource(0, sr0);
PSSetShaderResource(1, sr1);
for(size_t i = 2; i < m_state.ps_sr_views.size(); i++)
for (size_t i = 2; i < m_state.ps_sr_views.size(); i++)
{
PSSetShaderResource(i, NULL);
}
@ -1283,7 +1292,8 @@ void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
{
ID3D11ShaderResourceView* srv = NULL;
if(sr) srv = *(GSTexture11*)sr;
if (sr)
srv = *(GSTexture11*)sr;
PSSetShaderResourceView(i, srv, sr);
}
@ -1292,7 +1302,7 @@ void GSDevice11::PSSetShaderResourceView(int i, ID3D11ShaderResourceView* srv, G
{
ASSERT(i < (int)m_state.ps_sr_views.size());
if(m_state.ps_sr_views[i] != srv)
if (m_state.ps_sr_views[i] != srv)
{
m_state.ps_sr_views[i] = srv;
m_state.ps_sr_texture[i] = (GSTexture11*)sr;
@ -1302,7 +1312,7 @@ void GSDevice11::PSSetShaderResourceView(int i, ID3D11ShaderResourceView* srv, G
void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1)
{
if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1)
if (m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1)
{
m_state.ps_ss[0] = ss0;
m_state.ps_ss[1] = ss1;
@ -1311,14 +1321,14 @@ void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState*
void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
{
if(m_state.ps != ps)
if (m_state.ps != ps)
{
m_state.ps = ps;
m_ctx->PSSetShader(ps, NULL, 0);
}
if(m_state.ps_cb != ps_cb)
if (m_state.ps_cb != ps_cb)
{
m_state.ps_cb = ps_cb;
@ -1334,7 +1344,7 @@ void GSDevice11::PSUpdateShaderState()
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref)
{
if(m_state.dss != dss || m_state.sref != sref)
if (m_state.dss != dss || m_state.sref != sref)
{
m_state.dss = dss;
m_state.sref = sref;
@ -1345,7 +1355,7 @@ void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref
void GSDevice11::OMSetBlendState(ID3D11BlendState* bs, float bf)
{
if(m_state.bs != bs || m_state.bf != bf)
if (m_state.bs != bs || m_state.bf != bf)
{
m_state.bs = bs;
m_state.bf = bf;
@ -1364,10 +1374,10 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
if (!rt && !ds)
throw GSDXRecoverableError();
if(rt) rtv = *(GSTexture11*)rt;
if(ds) dsv = *(GSTexture11*)ds;
if (rt) rtv = *(GSTexture11*)rt;
if (ds) dsv = *(GSTexture11*)ds;
if(m_state.rt_view != rtv || m_state.dsv != dsv)
if (m_state.rt_view != rtv || m_state.dsv != dsv)
{
m_state.rt_view = rtv;
m_state.rt_texture = static_cast<GSTexture11*>(rt);
@ -1378,7 +1388,7 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
}
GSVector2i size = rt ? rt->GetSize() : ds->GetSize();
if(m_state.viewport != size)
if (m_state.viewport != size)
{
m_state.viewport = size;
@ -1397,7 +1407,7 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy();
if(!m_state.scissor.eq(r))
if (!m_state.scissor.eq(r))
{
m_state.scissor = r;
@ -1436,14 +1446,14 @@ void GSDevice11::CreateShader(const std::vector<char>& source, const char* fn, I
hr = m_dev->CreateVertexShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, vs);
if(FAILED(hr))
if (FAILED(hr))
{
throw GSDXRecoverableError();
}
hr = m_dev->CreateInputLayout(layout, count, shader->GetBufferPointer(), shader->GetBufferSize(), il);
if(FAILED(hr))
if (FAILED(hr))
{
throw GSDXRecoverableError();
}
@ -1459,7 +1469,7 @@ void GSDevice11::CreateShader(const std::vector<char>& source, const char* fn, I
hr = m_dev->CreateGeometryShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, gs);
if(FAILED(hr))
if (FAILED(hr))
{
throw GSDXRecoverableError();
}
@ -1475,7 +1485,7 @@ void GSDevice11::CreateShader(const std::vector<char>& source, const char* fn, I
hr = m_dev->CreatePixelShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, ps);
if(FAILED(hr))
if (FAILED(hr))
{
throw GSDXRecoverableError();
}
@ -1494,8 +1504,7 @@ void GSDevice11::CompileShader(const std::vector<char>& source, const char* fn,
const HRESULT hr = D3DCompile(
source.data(), source.size(), fn, macro,
include, entry, shader_model.c_str(),
flags, 0, shader, &error
);
flags, 0, shader, &error);
if (error)
fprintf(stderr, "%s\n", (const char*)error->GetBufferPointer());
@ -1508,25 +1517,25 @@ uint16 GSDevice11::ConvertBlendEnum(uint16 generic)
{
switch (generic)
{
case SRC_COLOR : return D3D11_BLEND_SRC_COLOR;
case INV_SRC_COLOR : return D3D11_BLEND_INV_SRC_COLOR;
case DST_COLOR : return D3D11_BLEND_DEST_COLOR;
case INV_DST_COLOR : return D3D11_BLEND_INV_DEST_COLOR;
case SRC1_COLOR : return D3D11_BLEND_SRC1_COLOR;
case INV_SRC1_COLOR : return D3D11_BLEND_INV_SRC1_COLOR;
case SRC_ALPHA : return D3D11_BLEND_SRC_ALPHA;
case INV_SRC_ALPHA : return D3D11_BLEND_INV_SRC_ALPHA;
case DST_ALPHA : return D3D11_BLEND_DEST_ALPHA;
case INV_DST_ALPHA : return D3D11_BLEND_INV_DEST_ALPHA;
case SRC1_ALPHA : return D3D11_BLEND_SRC1_ALPHA;
case INV_SRC1_ALPHA : return D3D11_BLEND_INV_SRC1_ALPHA;
case CONST_COLOR : return D3D11_BLEND_BLEND_FACTOR;
case INV_CONST_COLOR : return D3D11_BLEND_INV_BLEND_FACTOR;
case CONST_ONE : return D3D11_BLEND_ONE;
case CONST_ZERO : return D3D11_BLEND_ZERO;
case OP_ADD : return D3D11_BLEND_OP_ADD;
case OP_SUBTRACT : return D3D11_BLEND_OP_SUBTRACT;
case OP_REV_SUBTRACT : return D3D11_BLEND_OP_REV_SUBTRACT;
default : ASSERT(0); return 0;
case SRC_COLOR: return D3D11_BLEND_SRC_COLOR;
case INV_SRC_COLOR: return D3D11_BLEND_INV_SRC_COLOR;
case DST_COLOR: return D3D11_BLEND_DEST_COLOR;
case INV_DST_COLOR: return D3D11_BLEND_INV_DEST_COLOR;
case SRC1_COLOR: return D3D11_BLEND_SRC1_COLOR;
case INV_SRC1_COLOR: return D3D11_BLEND_INV_SRC1_COLOR;
case SRC_ALPHA: return D3D11_BLEND_SRC_ALPHA;
case INV_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA;
case DST_ALPHA: return D3D11_BLEND_DEST_ALPHA;
case INV_DST_ALPHA: return D3D11_BLEND_INV_DEST_ALPHA;
case SRC1_ALPHA: return D3D11_BLEND_SRC1_ALPHA;
case INV_SRC1_ALPHA: return D3D11_BLEND_INV_SRC1_ALPHA;
case CONST_COLOR: return D3D11_BLEND_BLEND_FACTOR;
case INV_CONST_COLOR: return D3D11_BLEND_INV_BLEND_FACTOR;
case CONST_ONE: return D3D11_BLEND_ONE;
case CONST_ZERO: return D3D11_BLEND_ZERO;
case OP_ADD: return D3D11_BLEND_OP_ADD;
case OP_SUBTRACT: return D3D11_BLEND_OP_SUBTRACT;
case OP_REV_SUBTRACT: return D3D11_BLEND_OP_REV_SUBTRACT;
default: ASSERT(0); return 0;
}
}

View File

@ -34,7 +34,7 @@ struct GSVertexShader11
class GSDevice11 final : public GSDevice
{
public:
#pragma pack(push, 1)
#pragma pack(push, 1)
struct alignas(32) VSConstantBuffer
{
@ -58,7 +58,7 @@ public:
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3])).alltrue())
if (!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3])).alltrue())
{
a[0] = b[0];
a[1] = b[1];
@ -78,19 +78,25 @@ public:
{
struct
{
uint32 tme:1;
uint32 fst:1;
uint32 tme : 1;
uint32 fst : 1;
uint32 _free:30;
uint32 _free : 30;
};
uint32 key;
};
operator uint32() const {return key;}
operator uint32() const { return key; }
VSSelector() : key(0) {}
VSSelector(uint32 k) : key(k) {}
VSSelector()
: key(0)
{
}
VSSelector(uint32 k)
: key(k)
{
}
};
struct alignas(32) PSConstantBuffer
@ -131,7 +137,7 @@ public:
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
if(!((a[0] == b[0]) /*& (a[1] == b1)*/ & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5]) &
if (!((a[0] == b[0]) /*& (a[1] == b1)*/ & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5]) &
(a[6] == b[6]) & (a[7] == b[7]) & (a[9] == b[9]) & // if WH matches HalfTexel does too
(a[10] == b[10]) & (a[11] == b[11]) & (a[12] == b[12]) & (a[13] == b[13])).alltrue())
{
@ -178,22 +184,28 @@ public:
{
struct
{
uint32 iip:1;
uint32 prim:2;
uint32 point:1;
uint32 line:1;
uint32 cpu_sprite:1;
uint32 iip : 1;
uint32 prim : 2;
uint32 point : 1;
uint32 line : 1;
uint32 cpu_sprite : 1;
uint32 _free:26;
uint32 _free : 26;
};
uint32 key;
};
operator uint32() {return key;}
operator uint32() { return key; }
GSSelector() : key(0) {}
GSSelector(uint32 k) : key(k) {}
GSSelector()
: key(0)
{
}
GSSelector(uint32 k)
: key(k)
{
}
};
struct PSSelector
@ -204,63 +216,66 @@ public:
{
// *** Word 1
// Format
uint32 fmt:4;
uint32 dfmt:2;
uint32 depth_fmt:2;
uint32 fmt : 4;
uint32 dfmt : 2;
uint32 depth_fmt : 2;
// Alpha extension/Correction
uint32 aem:1;
uint32 fba:1;
uint32 aem : 1;
uint32 fba : 1;
// Fog
uint32 fog:1;
uint32 fog : 1;
// Pixel test
uint32 atst:3;
uint32 atst : 3;
// Color sampling
uint32 fst:1;
uint32 tfx:3;
uint32 tcc:1;
uint32 wms:2;
uint32 wmt:2;
uint32 ltf:1;
uint32 fst : 1;
uint32 tfx : 3;
uint32 tcc : 1;
uint32 wms : 2;
uint32 wmt : 2;
uint32 ltf : 1;
// Shuffle and fbmask effect
uint32 shuffle:1;
uint32 read_ba:1;
uint32 fbmask:1;
uint32 shuffle : 1;
uint32 read_ba : 1;
uint32 fbmask : 1;
// Blend and Colclip
uint32 hdr:1;
uint32 blend_a:2;
uint32 blend_b:2; // bit30/31
uint32 blend_c:2; // bit0
uint32 blend_d:2;
uint32 clr1:1;
uint32 colclip:1;
uint32 pabe:1;
uint32 hdr : 1;
uint32 blend_a : 2;
uint32 blend_b : 2; // bit30/31
uint32 blend_c : 2; // bit0
uint32 blend_d : 2;
uint32 clr1 : 1;
uint32 colclip : 1;
uint32 pabe : 1;
// Others ways to fetch the texture
uint32 channel:3;
uint32 channel : 3;
// Dithering
uint32 dither:2;
uint32 dither : 2;
// Depth clamp
uint32 zclamp:1;
uint32 zclamp : 1;
// Hack
uint32 tcoffsethack:1;
uint32 urban_chaos_hle:1;
uint32 tales_of_abyss_hle:1;
uint32 point_sampler:1;
uint32 invalid_tex0:1; // Lupin the 3rd
uint32 tcoffsethack : 1;
uint32 urban_chaos_hle : 1;
uint32 tales_of_abyss_hle : 1;
uint32 point_sampler : 1;
uint32 invalid_tex0 : 1; // Lupin the 3rd
uint32 _free:14;
uint32 _free : 14;
};
uint64 key;
};
operator uint64() {return key;}
operator uint64() { return key; }
PSSelector() : key(0) {}
PSSelector()
: key(0)
{
}
};
struct PSSamplerSelector
@ -269,17 +284,20 @@ public:
{
struct
{
uint32 tau:1;
uint32 tav:1;
uint32 ltf:1;
uint32 tau : 1;
uint32 tav : 1;
uint32 ltf : 1;
};
uint32 key;
};
operator uint32() {return key & 0x7;}
operator uint32() { return key & 0x7; }
PSSamplerSelector() : key(0) {}
PSSamplerSelector()
: key(0)
{
}
};
struct OMDepthStencilSelector
@ -288,19 +306,22 @@ public:
{
struct
{
uint32 ztst:2;
uint32 zwe:1;
uint32 date:1;
uint32 fba:1;
uint32 date_one:1;
uint32 ztst : 2;
uint32 zwe : 1;
uint32 date : 1;
uint32 fba : 1;
uint32 date_one : 1;
};
uint32 key;
};
operator uint32() {return key & 0x3f;}
operator uint32() { return key & 0x3f; }
OMDepthStencilSelector() : key(0) {}
OMDepthStencilSelector()
: key(0)
{
}
};
struct OMBlendSelector
@ -310,44 +331,55 @@ public:
struct
{
// Color mask
uint32 wr:1;
uint32 wg:1;
uint32 wb:1;
uint32 wa:1;
uint32 wr : 1;
uint32 wg : 1;
uint32 wb : 1;
uint32 wa : 1;
// Alpha blending
uint32 blend_index:7;
uint32 abe:1;
uint32 accu_blend:1;
uint32 blend_index : 7;
uint32 abe : 1;
uint32 accu_blend : 1;
};
struct
{
// Color mask
uint32 wrgba:4;
uint32 wrgba : 4;
};
uint32 key;
};
operator uint32() {return key & 0x1fff;}
operator uint32() { return key & 0x1fff; }
OMBlendSelector() : key(0) {}
OMBlendSelector()
: key(0)
{
}
};
#pragma pack(pop)
#pragma pack(pop)
class ShaderMacro
{
struct mcstr
{
const char* name, * def;
mcstr(const char* n, const char* d) : name(n), def(d) {}
const char *name, *def;
mcstr(const char* n, const char* d)
: name(n)
, def(d)
{
}
};
struct mstring
{
std::string name, def;
mstring(const char* n, std::string d) : name(n), def(d) {}
mstring(const char* n, std::string d)
: name(n)
, def(d)
{
}
};
std::vector<mstring> mlist;
@ -494,7 +526,12 @@ private:
std::unique_ptr<GSTexture> m_font;
protected:
struct {D3D_FEATURE_LEVEL level; std::string model, vs, gs, ps, cs;} m_shader;
struct
{
D3D_FEATURE_LEVEL level;
std::string model, vs, gs, ps, cs;
} m_shader;
public:
GSDevice11();
virtual ~GSDevice11() {}
@ -502,7 +539,7 @@ public:
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode);
void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const { level = m_shader.level; }
bool Create(const std::shared_ptr<GSWnd> &wnd);
bool Create(const std::shared_ptr<GSWnd>& wnd);
bool Reset(int w, int h);
void Flip();
void SetVSync(int vsync) final;
@ -558,9 +595,9 @@ public:
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
ID3D11Device* operator->() {return m_dev;}
operator ID3D11Device*() {return m_dev;}
operator ID3D11DeviceContext*() {return m_ctx;}
ID3D11Device* operator->() { return m_dev; }
operator ID3D11Device*() { return m_dev; }
operator ID3D11DeviceContext*() { return m_ctx; }
void CreateShader(const std::vector<char>& source, const char* fn, ID3DInclude* include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il);
void CreateShader(const std::vector<char>& source, const char* fn, ID3DInclude* include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11GeometryShader** gs);
@ -568,4 +605,3 @@ public:
void CompileShader(const std::vector<char>& source, const char* fn, ID3DInclude* include, const char* entry, D3D_SHADER_MACRO* macro, ID3DBlob** shader, std::string shader_model);
};

View File

@ -45,49 +45,49 @@ void GSRendererDX11::SetupIA(const float& sx, const float& sy)
switch (m_vt.m_primclass)
{
case GS_POINT_CLASS:
if (unscale_pt_ln)
{
m_gs_sel.point = 1;
gs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy);
}
case GS_POINT_CLASS:
if (unscale_pt_ln)
{
m_gs_sel.point = 1;
gs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy);
}
t = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
break;
t = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
break;
case GS_LINE_CLASS:
if (unscale_pt_ln)
{
m_gs_sel.line = 1;
gs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy);
}
case GS_LINE_CLASS:
if (unscale_pt_ln)
{
m_gs_sel.line = 1;
gs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy);
}
t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
break;
case GS_SPRITE_CLASS:
// Lines: GPU conversion.
// Triangles: CPU conversion.
if (!m_vt.m_accurate_stq && m_vertex.next > 32) // <=> 16 sprites (based on Shadow Hearts)
{
t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
}
else
{
m_gs_sel.cpu_sprite = 1;
Lines2Sprites();
break;
case GS_SPRITE_CLASS:
// Lines: GPU conversion.
// Triangles: CPU conversion.
if (!m_vt.m_accurate_stq && m_vertex.next > 32) // <=> 16 sprites (based on Shadow Hearts)
{
t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
}
else
{
m_gs_sel.cpu_sprite = 1;
Lines2Sprites();
t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
}
break;
case GS_TRIANGLE_CLASS:
t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
}
break;
break;
case GS_TRIANGLE_CLASS:
t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
break;
default:
__assume(0);
default:
__assume(0);
}
void* ptr = NULL;
@ -102,7 +102,8 @@ void GSRendererDX11::SetupIA(const float& sx, const float& sy)
for (unsigned int i = 0; i < m_vertex.next; i++)
{
if (PRIM->TME && PRIM->FST) d[i].UV &= 0x3FEF3FEF;
if (PRIM->TME && PRIM->FST)
d[i].UV &= 0x3FEF3FEF;
}
}
@ -401,7 +402,6 @@ void GSRendererDX11::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache:
// fprintf(stderr, "%d: Green channel (wrong mask) (fbmask %x)\n", s_n, m_context->FRAME.FBMSK >> 24);
m_ps_sel.channel = ChannelFetch_GREEN;
}
}
else if (green)
{
@ -452,7 +452,7 @@ void GSRendererDX11::EmulateBlending()
{
// Partial port of OGL SW blending. Currently only works for accumulation and non recursive blend.
const GIFRegALPHA& ALPHA = m_context->ALPHA;
bool sw_blending = false;
bool sw_blending = false;
// No blending so early exit
if (!(PRIM->ABE || m_env.PABE.PABE || (PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS)))
@ -491,7 +491,8 @@ void GSRendererDX11::EmulateBlending()
case ACC_BLEND_BASIC_D3D11:
sw_blending |= accumulation_blend || blend_non_recursive;
[[fallthrough]];
default: break;
default:
break;
}
// Color clip
@ -532,7 +533,8 @@ void GSRendererDX11::EmulateBlending()
{
m_om_bsel.accu_blend = 1;
if (ALPHA.A == 2) {
if (ALPHA.A == 2)
{
// The blend unit does a reverse subtraction so it means
// the shader must output a positive value.
// Replace 0 - Cs by Cs - 0
@ -568,8 +570,8 @@ void GSRendererDX11::EmulateTextureSampler(const GSTextureCache::Source* tex)
{
// Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth.
//const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM];
const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
const uint8 wms = m_context->CLAMP.WMS;
const uint8 wmt = m_context->CLAMP.WMT;
@ -619,7 +621,6 @@ void GSRendererDX11::EmulateTextureSampler(const GSTextureCache::Source* tex)
GSVector4 half_offset = RealignTargetTextureCoordinate(tex);
vs_cb.Texture_Scale_Offset.z = half_offset.x;
vs_cb.Texture_Scale_Offset.w = half_offset.y;
}
else if (tex->m_target)
{
@ -684,7 +685,6 @@ void GSRendererDX11::EmulateTextureSampler(const GSTextureCache::Source* tex)
// Note 4 bits indexes are converted to 8 bits
m_ps_sel.fmt = 3 << 2;
}
else
{
@ -930,7 +930,7 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou
m_ps_sel.fba = m_context->FBA.FBA;
m_ps_sel.dither = m_dithering > 0 && m_ps_sel.dfmt == 2 && m_env.DTHE.DTHE;
if(m_ps_sel.dither)
if (m_ps_sel.dither)
{
m_ps_sel.dither = m_dithering;
ps_cb.DitherMatrix[0] = GSVector4(m_env.DIMX.DM00, m_env.DIMX.DM10, m_env.DIMX.DM20, m_env.DIMX.DM30);
@ -1091,7 +1091,7 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou
bool b = m_om_bsel.wb;
bool a = m_om_bsel.wa;
switch(m_context->TEST.AFAIL)
switch (m_context->TEST.AFAIL)
{
case AFAIL_KEEP: z = r = g = b = a = false; break; // none
case AFAIL_FB_ONLY: z = false; break; // rgba

View File

@ -27,7 +27,8 @@
class GSRendererDX11 final : public GSRendererHW
{
enum ACC_BLEND_D3D11 {
enum ACC_BLEND_D3D11
{
ACC_BLEND_NONE_D3D11 = 0,
ACC_BLEND_BASIC_D3D11 = 1,
ACC_BLEND_MEDIUM_D3D11 = 2,

View File

@ -36,10 +36,14 @@ GSTexture11::GSTexture11(ID3D11Texture2D* texture)
m_size.x = (int)m_desc.Width;
m_size.y = (int)m_desc.Height;
if(m_desc.BindFlags & D3D11_BIND_RENDER_TARGET) m_type = RenderTarget;
else if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) m_type = DepthStencil;
else if(m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) m_type = Texture;
else if(m_desc.Usage == D3D11_USAGE_STAGING) m_type = Offscreen;
if (m_desc.BindFlags & D3D11_BIND_RENDER_TARGET)
m_type = RenderTarget;
else if (m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL)
m_type = DepthStencil;
else if (m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE)
m_type = Texture;
else if (m_desc.Usage == D3D11_USAGE_STAGING)
m_type = Offscreen;
m_format = (int)m_desc.Format;
@ -48,12 +52,12 @@ GSTexture11::GSTexture11(ID3D11Texture2D* texture)
bool GSTexture11::Update(const GSVector4i& r, const void* data, int pitch, int layer)
{
if(layer >= m_max_layer)
if (layer >= m_max_layer)
return true;
if(m_dev && m_texture)
if (m_dev && m_texture)
{
D3D11_BOX box = { (UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U };
D3D11_BOX box = {(UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U};
UINT subresource = layer; // MipSlice + (ArraySlice * MipLevels).
m_ctx->UpdateSubresource(m_texture, subresource, &box, data, pitch, 0);
@ -66,21 +70,21 @@ bool GSTexture11::Update(const GSVector4i& r, const void* data, int pitch, int l
bool GSTexture11::Map(GSMap& m, const GSVector4i* r, int layer)
{
if(r != NULL)
if (r != NULL)
{
// ASSERT(0); // not implemented
return false;
}
if(layer >= m_max_layer)
if (layer >= m_max_layer)
return false;
if(m_texture && m_desc.Usage == D3D11_USAGE_STAGING)
if (m_texture && m_desc.Usage == D3D11_USAGE_STAGING)
{
D3D11_MAPPED_SUBRESOURCE map;
UINT subresource = layer;
if(SUCCEEDED(m_ctx->Map(m_texture, subresource, D3D11_MAP_READ_WRITE, 0, &map)))
if (SUCCEEDED(m_ctx->Map(m_texture, subresource, D3D11_MAP_READ_WRITE, 0, &map)))
{
m.bits = (uint8*)map.pData;
m.pitch = (int)map.RowPitch;
@ -96,7 +100,7 @@ bool GSTexture11::Map(GSMap& m, const GSVector4i* r, int layer)
void GSTexture11::Unmap()
{
if(m_texture)
if (m_texture)
{
UINT subresource = m_layer;
m_ctx->Unmap(m_texture, subresource);
@ -156,7 +160,7 @@ bool GSTexture11::Save(const std::string& fn)
{
for (uint32 x = 0; x < desc.Width; x++)
{
reinterpret_cast<uint32*>(d)[x] = static_cast<uint32>(ldexpf(reinterpret_cast<float*>(s)[x*2], 32));
reinterpret_cast<uint32*>(d)[x] = static_cast<uint32>(ldexpf(reinterpret_cast<float*>(s)[x * 2], 32));
}
}
@ -175,14 +179,14 @@ bool GSTexture11::Save(const std::string& fn)
#endif
switch (desc.Format)
{
case DXGI_FORMAT_A8_UNORM:
format = GSPng::R8I_PNG;
break;
case DXGI_FORMAT_R8G8B8A8_UNORM:
break;
default:
fprintf(stderr, "DXGI_FORMAT %d not saved to image\n", desc.Format);
return false;
case DXGI_FORMAT_A8_UNORM:
format = GSPng::R8I_PNG;
break;
case DXGI_FORMAT_R8G8B8A8_UNORM:
break;
default:
fprintf(stderr, "DXGI_FORMAT %d not saved to image\n", desc.Format);
return false;
}
D3D11_MAPPED_SUBRESOURCE sm;
@ -207,9 +211,9 @@ GSTexture11::operator ID3D11Texture2D*()
GSTexture11::operator ID3D11ShaderResourceView*()
{
if(!m_srv && m_dev && m_texture)
if (!m_srv && m_dev && m_texture)
{
if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS)
if (m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS)
{
D3D11_SHADER_RESOURCE_VIEW_DESC srvd = {};
@ -232,7 +236,7 @@ GSTexture11::operator ID3D11RenderTargetView*()
{
ASSERT(m_dev);
if(!m_rtv && m_dev && m_texture)
if (!m_rtv && m_dev && m_texture)
{
m_dev->CreateRenderTargetView(m_texture, NULL, &m_rtv);
}
@ -242,9 +246,9 @@ GSTexture11::operator ID3D11RenderTargetView*()
GSTexture11::operator ID3D11DepthStencilView*()
{
if(!m_dsv && m_dev && m_texture)
if (!m_dsv && m_dev && m_texture)
{
if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS)
if (m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS)
{
D3D11_DEPTH_STENCIL_VIEW_DESC dsvd = {};

View File

@ -121,13 +121,15 @@ void GSTextureCache11::Read(Source* t, const GSVector4i& r)
const GIFRegTEX0& TEX0 = t->m_TEX0;
if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) {
if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height()))
{
m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r);
GSTexture::GSMap m;
GSVector4i r_offscreen(0, 0, r.width(), r.height());
if (offscreen->Map(m, &r_offscreen)) {
if (offscreen->Map(m, &r_offscreen))
{
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);

View File

@ -27,7 +27,7 @@
class GSTextureCache11 : public GSTextureCache
{
protected:
int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;}
int Get8bitFormat() { return DXGI_FORMAT_A8_UNORM; }
void Read(Target* t, const GSVector4i& r);
void Read(Source* t, const GSVector4i& r);

View File

@ -38,7 +38,8 @@ bool GSDevice11::CreateTextureFX()
hr = m_dev->CreateBuffer(&bd, NULL, &m_vs_cb);
if(FAILED(hr)) return false;
if (FAILED(hr))
return false;
memset(&bd, 0, sizeof(bd));
@ -48,7 +49,8 @@ bool GSDevice11::CreateTextureFX()
hr = m_dev->CreateBuffer(&bd, NULL, &m_gs_cb);
if (FAILED(hr)) return false;
if (FAILED(hr))
return false;
memset(&bd, 0, sizeof(bd));
@ -58,7 +60,8 @@ bool GSDevice11::CreateTextureFX()
hr = m_dev->CreateBuffer(&bd, NULL, &m_ps_cb);
if(FAILED(hr)) return false;
if (FAILED(hr))
return false;
D3D11_SAMPLER_DESC sd;
@ -75,7 +78,8 @@ bool GSDevice11::CreateTextureFX()
hr = m_dev->CreateSamplerState(&sd, &m_palette_ss);
if(FAILED(hr)) return false;
if (FAILED(hr))
return false;
// create layout
@ -97,7 +101,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
{
auto i = std::as_const(m_vs).find(sel);
if(i == m_vs.end())
if (i == m_vs.end())
{
ShaderMacro sm(m_shader.model);
@ -126,7 +130,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
i = m_vs.find(sel);
}
if(m_vs_cb_cache.Update(cb))
if (m_vs_cb_cache.Update(cb))
{
ID3D11DeviceContext* ctx = m_ctx;
@ -184,7 +188,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
{
auto i = std::as_const(m_ps).find(sel);
if(i == m_ps.end())
if (i == m_ps.end())
{
ShaderMacro sm(m_shader.model);
@ -234,7 +238,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
i = m_ps.find(sel);
}
if(m_ps_cb_cache.Update(cb))
if (m_ps_cb_cache.Update(cb))
{
ID3D11DeviceContext* ctx = m_ctx;
@ -243,16 +247,16 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
CComPtr<ID3D11SamplerState> ss0, ss1;
if(sel.tfx != 4)
if (sel.tfx != 4)
{
if(!(sel.fmt < 3 && sel.wms < 3 && sel.wmt < 3))
if (!(sel.fmt < 3 && sel.wms < 3 && sel.wmt < 3))
{
ssel.ltf = 0;
}
auto i = std::as_const(m_ps_ss).find(ssel);
if(i != m_ps_ss.end())
if (i != m_ps_ss.end())
{
ss0 = i->second;
}
@ -278,7 +282,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
m_ps_ss[ssel] = ss0;
}
if(sel.fmt >= 3)
if (sel.fmt >= 3)
{
ss1 = m_palette_ss;
}
@ -293,13 +297,13 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin
{
auto i = std::as_const(m_om_dss).find(dssel);
if(i == m_om_dss.end())
if (i == m_om_dss.end())
{
D3D11_DEPTH_STENCIL_DESC dsd;
memset(&dsd, 0, sizeof(dsd));
if(dssel.date)
if (dssel.date)
{
dsd.StencilEnable = true;
dsd.StencilReadMask = 1;
@ -314,7 +318,7 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin
dsd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
}
if(dssel.ztst != ZTST_ALWAYS || dssel.zwe)
if (dssel.ztst != ZTST_ALWAYS || dssel.zwe)
{
static const D3D11_COMPARISON_FUNC ztst[] =
{
@ -342,7 +346,7 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin
auto j = std::as_const(m_om_bs).find(bsel);
if(j == m_om_bs.end())
if (j == m_om_bs.end())
{
D3D11_BLEND_DESC bd;
@ -350,7 +354,7 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin
bd.RenderTarget[0].BlendEnable = bsel.abe;
if(bsel.abe)
if (bsel.abe)
{
HWBlend blend = GetBlend(bsel.blend_index);
bd.RenderTarget[0].BlendOp = (D3D11_BLEND_OP)blend.op;
@ -367,10 +371,10 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin
}
}
if(bsel.wr) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_RED;
if(bsel.wg) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_GREEN;
if(bsel.wb) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_BLUE;
if(bsel.wa) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_ALPHA;
if (bsel.wr) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_RED;
if (bsel.wg) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_GREEN;
if (bsel.wb) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_BLUE;
if (bsel.wa) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_ALPHA;
CComPtr<ID3D11BlendState> bs;

File diff suppressed because it is too large Load Diff

View File

@ -43,7 +43,7 @@ private:
static const float SSR_UV_TOLERANCE;
#pragma region hacks
#pragma region hacks
typedef bool (GSRendererHW::*OI_Ptr)(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
typedef void (GSRendererHW::*OO_Ptr)();
@ -73,7 +73,8 @@ private:
class Hacks
{
template<class T> class HackEntry
template <class T>
class HackEntry
{
public:
CRC::Title title;
@ -88,18 +89,19 @@ private:
}
};
template<class T> class FunctionMap : public GSFunctionMap<uint32, T>
template <class T>
class FunctionMap : public GSFunctionMap<uint32, T>
{
std::list<HackEntry<T> >& m_tbl;
std::list<HackEntry<T>>& m_tbl;
T GetDefaultFunction(uint32 key)
{
CRC::Title title = (CRC::Title)(key & 0xffffff);
CRC::Region region = (CRC::Region)(key >> 24);
for(const auto &entry : m_tbl)
for (const auto& entry : m_tbl)
{
if(entry.title == title && (entry.region == CRC::RegionCount || entry.region == region))
if (entry.title == title && (entry.region == CRC::RegionCount || entry.region == region))
{
return entry.func;
}
@ -109,12 +111,15 @@ private:
}
public:
FunctionMap(std::list<HackEntry<T> >& tbl) : m_tbl(tbl) {}
FunctionMap(std::list<HackEntry<T>>& tbl)
: m_tbl(tbl)
{
}
};
std::list<HackEntry<OI_Ptr> > m_oi_list;
std::list<HackEntry<OO_Ptr> > m_oo_list;
std::list<HackEntry<CU_Ptr> > m_cu_list;
std::list<HackEntry<OI_Ptr>> m_oi_list;
std::list<HackEntry<OO_Ptr>> m_oo_list;
std::list<HackEntry<CU_Ptr>> m_cu_list;
FunctionMap<OI_Ptr> m_oi_map;
FunctionMap<OO_Ptr> m_oo_map;
@ -131,7 +136,7 @@ private:
} m_hacks;
#pragma endregion
#pragma endregion
uint16 Interpolate_UV(float alpha, int t0, int t1);
float alpha0(int L, int X0, int X1);
@ -139,7 +144,8 @@ private:
void SwSpriteRender();
bool CanUseSwSpriteRender(bool allow_64x64_sprite);
template <bool linear> void RoundSpriteOffset();
template <bool linear>
void RoundSpriteOffset();
protected:
GSTextureCache* m_tc;
@ -190,5 +196,5 @@ public:
void Draw();
// Called by the texture cache to know if current texture is useful
virtual bool IsDummyTexture() const { return false;}
virtual bool IsDummyTexture() const { return false; }
};

File diff suppressed because it is too large Load Diff

View File

@ -28,7 +28,11 @@
class GSTextureCache
{
public:
enum {RenderTarget, DepthStencil};
enum
{
RenderTarget,
DepthStencil
};
class Surface : public GSAlignedClass<32>
{
@ -43,7 +47,7 @@ public:
uint8* m_temp;
bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture
bool m_shared_texture;
uint32 m_end_block; // Hint of the surface area.
uint32 m_end_block; // Hint of the surface area.
public:
Surface(GSRenderer* r, uint8* temp);
@ -54,7 +58,8 @@ public:
bool Overlaps(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect);
};
struct PaletteKey {
struct PaletteKey
{
const uint32* clut;
uint16 pal;
};
@ -86,19 +91,25 @@ public:
void InitializeTexture();
};
struct PaletteKeyHash {
struct PaletteKeyHash
{
// Calculate hash
std::size_t operator()(const PaletteKey &key) const;
std::size_t operator()(const PaletteKey& key) const;
};
struct PaletteKeyEqual {
struct PaletteKeyEqual
{
// Compare pal value and clut contents
bool operator()(const PaletteKey &lhs, const PaletteKey &rhs) const;
bool operator()(const PaletteKey& lhs, const PaletteKey& rhs) const;
};
class Source : public Surface
{
struct {GSVector4i* rect; uint32 count;} m_write;
struct
{
GSVector4i* rect;
uint32 count;
} m_write;
void Write(const GSVector4i& r, int layer);
void Flush(uint32 count, int layer);
@ -116,7 +127,7 @@ public:
// still be valid on future. However it ought to be good when the source is created
// so it can be used to access un-converted data for the current draw call.
GSTexture* m_from_target;
GIFRegTEX0 m_from_target_TEX0; // TEX0 of the target texture, if any, else equal to texture TEX0
GIFRegTEX0 m_from_target_TEX0; // TEX0 of the target texture, if any, else equal to texture TEX0
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
// Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase
std::array<uint16, MAX_PAGES> m_erase_it;
@ -178,7 +189,11 @@ public:
uint32 m_pages[16]; // bitmap of all pages
bool m_used;
SourceMap() : m_used(false) {memset(m_pages, 0, sizeof(m_pages));}
SourceMap()
: m_used(false)
{
memset(m_pages, 0, sizeof(m_pages));
}
void Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off);
void RemoveAll();
@ -249,7 +264,8 @@ public:
bool ShallSearchTextureInsideRt();
const char* to_string(int type) {
const char* to_string(int type)
{
return (type == DepthStencil) ? "Depth" : "Color";
}

View File

@ -34,7 +34,12 @@ struct alignas(32) GSVertexHW9
// t.z = union {struct {uint8 r, g, b, a;}; uint32 c0;};
// t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;}
GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;}
GSVertexHW9& operator=(GSVertexHW9& v)
{
t = v.t;
p = v.p;
return *this;
}
};
#pragma pack(pop)

View File

@ -22,9 +22,9 @@
#include "stdafx.h"
#include "GSDeviceNull.h"
bool GSDeviceNull::Create(const std::shared_ptr<GSWnd> &wnd)
bool GSDeviceNull::Create(const std::shared_ptr<GSWnd>& wnd)
{
if(!GSDevice::Create(wnd))
if (!GSDevice::Create(wnd))
return false;
Reset(1, 1);
@ -41,4 +41,3 @@ GSTexture* GSDeviceNull::CreateSurface(int type, int w, int h, int format)
{
return new GSTextureNull(type, w, h, format);
}

View File

@ -36,7 +36,6 @@ private:
public:
GSDeviceNull() {}
bool Create(const std::shared_ptr<GSWnd> &wnd);
bool Create(const std::shared_ptr<GSWnd>& wnd);
bool Reset(int w, int h);
};

View File

@ -28,7 +28,10 @@ class GSRendererNull : public GSRenderer
class GSVertexTraceNull : public GSVertexTrace
{
public:
GSVertexTraceNull(const GSState* state) : GSVertexTrace(state) {}
GSVertexTraceNull(const GSState* state)
: GSVertexTrace(state)
{
}
};
protected:

View File

@ -25,17 +25,20 @@
class GSTextureNull : public GSTexture
{
struct {int type, w, h, format;} m_desc;
struct
{
int type, w, h, format;
} m_desc;
public:
GSTextureNull();
GSTextureNull(int type, int w, int h, int format);
int GetType() const {return m_desc.type;}
int GetFormat() const {return m_desc.format;}
int GetType() const { return m_desc.type; }
int GetFormat() const { return m_desc.format; }
bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) {return true;}
bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) {return false;}
bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) { return true; }
bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) { return false; }
void Unmap() {}
bool Save(const std::string& fn) {return false;}
bool Save(const std::string& fn) { return false; }
};

View File

@ -27,7 +27,7 @@
#define GL_BUFFER_0 (0)
#ifndef GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR
#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008
#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008
#endif
// FIX compilation issue with Mesa 10
@ -54,12 +54,12 @@
// Added in GL4.6. Code should be updated but driver support...
#ifndef GL_TEXTURE_MAX_ANISOTROPY_EXT
#define GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE
#define GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE
#endif
// Believe me or not, they forgot to add the interaction with DSA...
#ifndef GL_EXT_direct_state_access
typedef void (APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit);
typedef void(APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit);
#endif
// ********************** End of the extra header ******************* //
@ -115,18 +115,20 @@ typedef void (APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC) (GLuint texture, GLin
// It should be done by ENABLE_GL_VERSION_1_4 but it conflicts with the old gl.h
#if defined(__unix__) || defined(__APPLE__)
extern PFNGLBLENDFUNCSEPARATEPROC glBlendFuncSeparate;
extern PFNGLBLENDFUNCSEPARATEPROC glBlendFuncSeparate;
#endif
extern PFNGLTEXTUREPAGECOMMITMENTEXTPROC glTexturePageCommitmentEXT;
extern PFNGLTEXTUREPAGECOMMITMENTEXTPROC glTexturePageCommitmentEXT;
#include "PFN_GLLOADER_HPP.h"
namespace GLExtension {
namespace GLExtension
{
extern bool Has(const std::string& ext);
extern void Set(const std::string& ext, bool v = true);
}
} // namespace GLExtension
namespace GLLoader {
namespace GLLoader
{
void check_gl_requirements();
extern bool vendor_id_amd;
@ -145,4 +147,4 @@ namespace GLLoader {
extern bool found_compatible_GL_ARB_sparse_texture2;
extern bool found_compatible_sparse_depth;
}
} // namespace GLLoader

View File

@ -22,7 +22,8 @@
#include "stdafx.h"
#include "GLState.h"
namespace GLState {
namespace GLState
{
GLuint fbo;
GSVector2i viewport;
GSVector4i scissor;
@ -59,7 +60,8 @@ namespace GLState {
int64 available_vram;
void Clear() {
void Clear()
{
fbo = 0;
viewport = GSVector2i(0, 0);
scissor = GSVector4i(0, 0, 0, 0);
@ -100,4 +102,4 @@ namespace GLState {
// (256MB are reserved for PBO/IBO/VBO/UBO buffers)
available_vram = (4096u - 256u) * 1024u * 1024u;
}
}
} // namespace GLState

View File

@ -24,7 +24,8 @@
#include "GSdx.h"
#include "GSVector.h"
namespace GLState {
namespace GLState
{
extern GLuint fbo; // frame buffer object
extern GSVector2i viewport;
extern GSVector4i scissor;
@ -62,4 +63,4 @@ namespace GLState {
extern int64 available_vram;
extern void Clear();
}
} // namespace GLState

File diff suppressed because it is too large Load Diff

View File

@ -34,7 +34,8 @@ extern uint64 g_real_texture_upload_byte;
extern uint64 g_vertex_upload_byte;
#endif
class GSDepthStencilOGL {
class GSDepthStencilOGL
{
bool m_depth_enable;
GLenum m_depth_func;
bool m_depth_mask;
@ -44,8 +45,8 @@ class GSDepthStencilOGL {
GLenum m_stencil_spass_dpass_op;
public:
GSDepthStencilOGL() : m_depth_enable(false)
GSDepthStencilOGL()
: m_depth_enable(false)
, m_depth_func(GL_ALWAYS)
, m_depth_mask(0)
, m_stencil_enable(false)
@ -57,12 +58,21 @@ public:
void EnableDepth() { m_depth_enable = true; }
void EnableStencil() { m_stencil_enable = true; }
void SetDepth(GLenum func, bool mask) { m_depth_func = func; m_depth_mask = mask; }
void SetStencil(GLenum func, GLenum pass) { m_stencil_func = func; m_stencil_spass_dpass_op = pass; }
void SetDepth(GLenum func, bool mask)
{
m_depth_func = func;
m_depth_mask = mask;
}
void SetStencil(GLenum func, GLenum pass)
{
m_stencil_func = func;
m_stencil_spass_dpass_op = pass;
}
void SetupDepth()
{
if (GLState::depth != m_depth_enable) {
if (GLState::depth != m_depth_enable)
{
GLState::depth = m_depth_enable;
if (m_depth_enable)
glEnable(GL_DEPTH_TEST);
@ -70,12 +80,15 @@ public:
glDisable(GL_DEPTH_TEST);
}
if (m_depth_enable) {
if (GLState::depth_func != m_depth_func) {
if (m_depth_enable)
{
if (GLState::depth_func != m_depth_func)
{
GLState::depth_func = m_depth_func;
glDepthFunc(m_depth_func);
}
if (GLState::depth_mask != m_depth_mask) {
if (GLState::depth_mask != m_depth_mask)
{
GLState::depth_mask = m_depth_mask;
glDepthMask((GLboolean)m_depth_mask);
}
@ -84,7 +97,8 @@ public:
void SetupStencil()
{
if (GLState::stencil != m_stencil_enable) {
if (GLState::stencil != m_stencil_enable)
{
GLState::stencil = m_stencil_enable;
if (m_stencil_enable)
glEnable(GL_STENCIL_TEST);
@ -92,13 +106,16 @@ public:
glDisable(GL_STENCIL_TEST);
}
if (m_stencil_enable) {
if (m_stencil_enable)
{
// Note: here the mask control which bitplane is considered by the operation
if (GLState::stencil_func != m_stencil_func) {
if (GLState::stencil_func != m_stencil_func)
{
GLState::stencil_func = m_stencil_func;
glStencilFunc(m_stencil_func, 1, 1);
}
if (GLState::stencil_pass != m_stencil_spass_dpass_op) {
if (GLState::stencil_pass != m_stencil_spass_dpass_op)
{
GLState::stencil_pass = m_stencil_spass_dpass_op;
glStencilOp(GL_KEEP, GL_KEEP, m_stencil_spass_dpass_op);
}
@ -133,7 +150,7 @@ public:
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2])).alltrue())
if (!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2])).alltrue())
{
a[0] = b[0];
a[1] = b[1];
@ -152,17 +169,23 @@ public:
{
struct
{
uint32 int_fst:1;
uint32 _free:31;
uint32 int_fst : 1;
uint32 _free : 31;
};
uint32 key;
};
operator uint32() const {return key;}
operator uint32() const { return key; }
VSSelector() : key(0) {}
VSSelector(uint32 k) : key(k) {}
VSSelector()
: key(0)
{
}
VSSelector(uint32 k)
: key(k)
{
}
};
struct GSSelector
@ -171,20 +194,26 @@ public:
{
struct
{
uint32 sprite:1;
uint32 point:1;
uint32 line:1;
uint32 sprite : 1;
uint32 point : 1;
uint32 line : 1;
uint32 _free:29;
uint32 _free : 29;
};
uint32 key;
};
operator uint32() const {return key;}
operator uint32() const { return key; }
GSSelector() : key(0) {}
GSSelector(uint32 k) : key(k) {}
GSSelector()
: key(0)
{
}
GSSelector(uint32 k)
: key(k)
{
}
};
struct alignas(32) PSConstantBuffer
@ -263,74 +292,77 @@ public:
{
// *** Word 1
// Format
uint32 tex_fmt:4;
uint32 dfmt:2;
uint32 depth_fmt:2;
uint32 tex_fmt : 4;
uint32 dfmt : 2;
uint32 depth_fmt : 2;
// Alpha extension/Correction
uint32 aem:1;
uint32 fba:1;
uint32 aem : 1;
uint32 fba : 1;
// Fog
uint32 fog:1;
uint32 fog : 1;
// Flat/goround shading
uint32 iip:1;
uint32 iip : 1;
// Pixel test
uint32 date:3;
uint32 atst:3;
uint32 date : 3;
uint32 atst : 3;
// Color sampling
uint32 fst:1; // Investigate to do it on the VS
uint32 tfx:3;
uint32 tcc:1;
uint32 wms:2;
uint32 wmt:2;
uint32 ltf:1;
uint32 fst : 1; // Investigate to do it on the VS
uint32 tfx : 3;
uint32 tcc : 1;
uint32 wms : 2;
uint32 wmt : 2;
uint32 ltf : 1;
// Shuffle and fbmask effect
uint32 shuffle:1;
uint32 read_ba:1;
uint32 write_rg:1;
uint32 fbmask:1;
uint32 shuffle : 1;
uint32 read_ba : 1;
uint32 write_rg : 1;
uint32 fbmask : 1;
//uint32 _free1:0;
// *** Word 2
// Blend and Colclip
uint32 blend_a:2;
uint32 blend_b:2;
uint32 blend_c:2;
uint32 blend_d:2;
uint32 clr1:1; // useful?
uint32 hdr:1;
uint32 colclip:1;
uint32 pabe:1;
uint32 blend_a : 2;
uint32 blend_b : 2;
uint32 blend_c : 2;
uint32 blend_d : 2;
uint32 clr1 : 1; // useful?
uint32 hdr : 1;
uint32 colclip : 1;
uint32 pabe : 1;
// Others ways to fetch the texture
uint32 channel:3;
uint32 channel : 3;
// Dithering
uint32 dither:2;
uint32 dither : 2;
// Depth clamp
uint32 zclamp:1;
uint32 zclamp : 1;
// Hack
uint32 tcoffsethack:1;
uint32 urban_chaos_hle:1;
uint32 tales_of_abyss_hle:1;
uint32 tex_is_fb:1; // Jak Shadows
uint32 automatic_lod:1;
uint32 manual_lod:1;
uint32 point_sampler:1;
uint32 invalid_tex0:1; // Lupin the 3rd
uint32 tcoffsethack : 1;
uint32 urban_chaos_hle : 1;
uint32 tales_of_abyss_hle : 1;
uint32 tex_is_fb : 1; // Jak Shadows
uint32 automatic_lod : 1;
uint32 manual_lod : 1;
uint32 point_sampler : 1;
uint32 invalid_tex0 : 1; // Lupin the 3rd
uint32 _free2:6;
uint32 _free2 : 6;
};
uint64 key;
};
// FIXME is the & useful ?
operator uint64() const {return key;}
operator uint64() const { return key; }
PSSelector() : key(0) {}
PSSelector()
: key(0)
{
}
};
struct PSSamplerSelector
@ -339,22 +371,28 @@ public:
{
struct
{
uint32 tau:1;
uint32 tav:1;
uint32 biln:1;
uint32 triln:3;
uint32 aniso:1;
uint32 tau : 1;
uint32 tav : 1;
uint32 biln : 1;
uint32 triln : 3;
uint32 aniso : 1;
uint32 _free:25;
uint32 _free : 25;
};
uint32 key;
};
operator uint32() {return key;}
operator uint32() { return key; }
PSSamplerSelector() : key(0) {}
PSSamplerSelector(uint32 k) : key(k) {}
PSSamplerSelector()
: key(0)
{
}
PSSamplerSelector(uint32 k)
: key(k)
{
}
};
struct OMDepthStencilSelector
@ -363,22 +401,28 @@ public:
{
struct
{
uint32 ztst:2;
uint32 zwe:1;
uint32 date:1;
uint32 date_one:1;
uint32 ztst : 2;
uint32 zwe : 1;
uint32 date : 1;
uint32 date_one : 1;
uint32 _free:27;
uint32 _free : 27;
};
uint32 key;
};
// FIXME is the & useful ?
operator uint32() {return key;}
operator uint32() { return key; }
OMDepthStencilSelector() : key(0) {}
OMDepthStencilSelector(uint32 k) : key(k) {}
OMDepthStencilSelector()
: key(0)
{
}
OMDepthStencilSelector(uint32 k)
: key(k)
{
}
};
struct OMColorMaskSelector
@ -387,26 +431,29 @@ public:
{
struct
{
uint32 wr:1;
uint32 wg:1;
uint32 wb:1;
uint32 wa:1;
uint32 wr : 1;
uint32 wg : 1;
uint32 wb : 1;
uint32 wa : 1;
uint32 _free:28;
uint32 _free : 28;
};
struct
{
uint32 wrgba:4;
uint32 wrgba : 4;
};
uint32 key;
};
// FIXME is the & useful ?
operator uint32() {return key & 0xf;}
operator uint32() { return key & 0xf; }
OMColorMaskSelector() : key(0xF) {}
OMColorMaskSelector()
: key(0xF)
{
}
OMColorMaskSelector(uint32 c) { wrgba = c; }
};
@ -416,7 +463,7 @@ public:
GSVector4i ChannelShuffle;
GSVector4i EMOD_AC;
MiscConstantBuffer() {memset(this, 0, sizeof(*this));}
MiscConstantBuffer() { memset(this, 0, sizeof(*this)); }
};
static int m_shader_inst;
@ -436,61 +483,69 @@ private:
std::vector<char> m_shader_tfx_vgs;
std::vector<char> m_shader_tfx_fs;
GLuint m_fbo; // frame buffer container
GLuint m_fbo_read; // frame buffer container only for reading
GLuint m_fbo; // frame buffer container
GLuint m_fbo_read; // frame buffer container only for reading
GSVertexBufferStateOGL* m_va;// state of the vertex buffer/array
GSVertexBufferStateOGL* m_va; // state of the vertex buffer/array
struct {
GLuint ps[2]; // program object
GSUniformBufferOGL* cb; // uniform buffer object
struct
{
GLuint ps[2]; // program object
GSUniformBufferOGL* cb; // uniform buffer object
} m_merge_obj;
struct {
GLuint ps[4]; // program object
GSUniformBufferOGL* cb; // uniform buffer object
struct
{
GLuint ps[4]; // program object
GSUniformBufferOGL* cb; // uniform buffer object
} m_interlace;
struct {
GLuint vs; // program object
GLuint ps[ShaderConvert_Count]; // program object
GLuint ln; // sampler object
GLuint pt; // sampler object
struct
{
GLuint vs; // program object
GLuint ps[ShaderConvert_Count]; // program object
GLuint ln; // sampler object
GLuint pt; // sampler object
GSDepthStencilOGL* dss;
GSDepthStencilOGL* dss_write;
GSUniformBufferOGL* cb;
} m_convert;
struct {
struct
{
GLuint ps;
GSUniformBufferOGL *cb;
GSUniformBufferOGL* cb;
} m_fxaa;
struct {
struct
{
GLuint ps;
GSUniformBufferOGL* cb;
} m_shaderfx;
struct {
struct
{
GSDepthStencilOGL* dss;
GSTexture* t;
} m_date;
struct {
struct
{
GLuint ps;
} m_shadeboost;
struct {
struct
{
uint16 last_query;
GLuint timer_query[1<<16];
GLuint timer_query[1 << 16];
GLuint timer() { return timer_query[last_query]; }
} m_profiler;
GLuint m_vs[1<<1];
GLuint m_gs[1<<3];
GLuint m_ps_ss[1<<7];
GSDepthStencilOGL* m_om_dss[1<<5];
GLuint m_vs[1 << 1];
GLuint m_gs[1 << 3];
GLuint m_ps_ss[1 << 7];
GSDepthStencilOGL* m_om_dss[1 << 5];
std::unordered_map<uint64, GLuint> m_ps;
GLuint m_apitrace;
@ -530,9 +585,9 @@ public:
void GenerateProfilerData();
// Used by OpenGL, so the same calling convention is required.
static void APIENTRY DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar *gl_message, const void* userParam);
static void APIENTRY DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar* gl_message, const void* userParam);
bool Create(const std::shared_ptr<GSWnd> &wnd);
bool Create(const std::shared_ptr<GSWnd>& wnd);
bool Reset(int w, int h);
void Flip();
void SetVSync(int vsync);

File diff suppressed because it is too large Load Diff

View File

@ -27,13 +27,15 @@
class GSRendererOGL final : public GSRendererHW
{
enum PRIM_OVERLAP {
enum PRIM_OVERLAP
{
PRIM_OVERLAP_UNKNOW,
PRIM_OVERLAP_YES,
PRIM_OVERLAP_NO
};
enum ACC_BLEND {
enum ACC_BLEND
{
ACC_BLEND_NONE = 0,
ACC_BLEND_BASIC = 1,
ACC_BLEND_MEDIUM = 2,
@ -42,46 +44,46 @@ class GSRendererOGL final : public GSRendererHW
ACC_BLEND_ULTRA = 5
};
private:
PRIM_OVERLAP m_prim_overlap;
std::vector<size_t> m_drawlist;
private:
PRIM_OVERLAP m_prim_overlap;
std::vector<size_t> m_drawlist;
TriFiltering UserHacks_tri_filter;
TriFiltering UserHacks_tri_filter;
GSDeviceOGL::VSConstantBuffer vs_cb;
GSDeviceOGL::PSConstantBuffer ps_cb;
GSDeviceOGL::VSConstantBuffer vs_cb;
GSDeviceOGL::PSConstantBuffer ps_cb;
bool m_require_one_barrier;
bool m_require_full_barrier;
bool m_require_one_barrier;
bool m_require_full_barrier;
GSDeviceOGL::VSSelector m_vs_sel;
GSDeviceOGL::GSSelector m_gs_sel;
GSDeviceOGL::PSSelector m_ps_sel;
GSDeviceOGL::VSSelector m_vs_sel;
GSDeviceOGL::GSSelector m_gs_sel;
GSDeviceOGL::PSSelector m_ps_sel;
GSDeviceOGL::PSSamplerSelector m_ps_ssel;
GSDeviceOGL::OMColorMaskSelector m_om_csel;
GSDeviceOGL::OMDepthStencilSelector m_om_dssel;
GSDeviceOGL::PSSamplerSelector m_ps_ssel;
GSDeviceOGL::OMColorMaskSelector m_om_csel;
GSDeviceOGL::OMDepthStencilSelector m_om_dssel;
private:
inline void ResetStates();
inline void SetupIA(const float& sx, const float& sy);
inline void EmulateTextureShuffleAndFbmask();
inline void EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex);
inline void EmulateBlending(bool& DATE_GL42, bool& DATE_GL45);
inline void EmulateTextureSampler(const GSTextureCache::Source* tex);
inline void EmulateZbuffer();
private:
inline void ResetStates();
inline void SetupIA(const float& sx, const float& sy);
inline void EmulateTextureShuffleAndFbmask();
inline void EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex);
inline void EmulateBlending(bool& DATE_GL42, bool& DATE_GL45);
inline void EmulateTextureSampler(const GSTextureCache::Source* tex);
inline void EmulateZbuffer();
public:
GSRendererOGL();
virtual ~GSRendererOGL() {};
public:
GSRendererOGL();
virtual ~GSRendererOGL() {}
bool CreateDevice(GSDevice* dev);
bool CreateDevice(GSDevice* dev);
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) final;
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) final;
PRIM_OVERLAP PrimitiveOverlap();
PRIM_OVERLAP PrimitiveOverlap();
void SendDraw();
void SendDraw();
bool IsDummyTexture() const final;
bool IsDummyTexture() const final;
};

View File

@ -29,9 +29,9 @@
#include "GSdxResources.h"
#endif
GSShaderOGL::GSShaderOGL(bool debug) :
m_pipeline(0),
m_debug_shader(debug)
GSShaderOGL::GSShaderOGL(bool debug)
: m_pipeline(0)
, m_debug_shader(debug)
{
theApp.LoadResource(IDR_COMMON_GLSL, m_common_header);
@ -43,10 +43,12 @@ GSShaderOGL::GSShaderOGL(bool debug) :
GSShaderOGL::~GSShaderOGL()
{
printf("Delete %zu Shaders, %zu Programs, %zu Pipelines\n",
m_shad_to_delete.size(), m_prog_to_delete.size(), m_pipe_to_delete.size());
m_shad_to_delete.size(), m_prog_to_delete.size(), m_pipe_to_delete.size());
for (auto s : m_shad_to_delete) glDeleteShader(s);
for (auto p : m_prog_to_delete) glDeleteProgram(p);
for (auto s : m_shad_to_delete)
glDeleteShader(s);
for (auto p : m_prog_to_delete)
glDeleteProgram(p);
glDeleteProgramPipelines(m_pipe_to_delete.size(), &m_pipe_to_delete[0]);
}
@ -91,7 +93,8 @@ void GSShaderOGL::BindProgram(GLuint vs, GLuint gs, GLuint ps)
{
GLuint p = LinkProgram(vs, gs, ps);
if (GLState::program != p) {
if (GLState::program != p)
{
GLState::program = p;
glUseProgram(p);
}
@ -99,7 +102,8 @@ void GSShaderOGL::BindProgram(GLuint vs, GLuint gs, GLuint ps)
void GSShaderOGL::BindProgram(GLuint p)
{
if (GLState::program != p) {
if (GLState::program != p)
{
GLState::program = p;
glUseProgram(p);
}
@ -109,12 +113,14 @@ void GSShaderOGL::BindPipeline(GLuint vs, GLuint gs, GLuint ps)
{
BindPipeline(m_pipeline);
if (GLState::vs != vs) {
if (GLState::vs != vs)
{
GLState::vs = vs;
glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, vs);
}
if (GLState::gs != gs) {
if (GLState::gs != gs)
{
GLState::gs = gs;
glUseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, gs);
}
@ -133,12 +139,14 @@ void GSShaderOGL::BindPipeline(GLuint vs, GLuint gs, GLuint ps)
void GSShaderOGL::BindPipeline(GLuint pipe)
{
if (GLState::pipeline != pipe) {
if (GLState::pipeline != pipe)
{
GLState::pipeline = pipe;
glBindProgramPipeline(pipe);
}
if (GLState::program) {
if (GLState::program)
{
GLState::program = 0;
glUseProgram(0);
}
@ -146,15 +154,18 @@ void GSShaderOGL::BindPipeline(GLuint pipe)
bool GSShaderOGL::ValidateShader(GLuint s)
{
if (!m_debug_shader) return true;
if (!m_debug_shader)
return true;
GLint status = 0;
glGetShaderiv(s, GL_COMPILE_STATUS, &status);
if (status) return true;
if (status)
return true;
GLint log_length = 0;
glGetShaderiv(s, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) {
if (log_length > 0)
{
char* log = new char[log_length];
glGetShaderInfoLog(s, log_length, NULL, log);
fprintf(stderr, "%s", log);
@ -167,15 +178,18 @@ bool GSShaderOGL::ValidateShader(GLuint s)
bool GSShaderOGL::ValidateProgram(GLuint p)
{
if (!m_debug_shader) return true;
if (!m_debug_shader)
return true;
GLint status = 0;
glGetProgramiv(p, GL_LINK_STATUS, &status);
if (status) return true;
if (status)
return true;
GLint log_length = 0;
glGetProgramiv(p, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) {
if (log_length > 0)
{
char* log = new char[log_length];
glGetProgramInfoLog(p, log_length, NULL, log);
fprintf(stderr, "%s", log);
@ -188,18 +202,21 @@ bool GSShaderOGL::ValidateProgram(GLuint p)
bool GSShaderOGL::ValidatePipeline(GLuint p)
{
if (!m_debug_shader) return true;
if (!m_debug_shader)
return true;
// FIXME: might be mandatory to validate the pipeline
glValidateProgramPipeline(p);
GLint status = 0;
glGetProgramPipelineiv(p, GL_VALIDATE_STATUS, &status);
if (status) return true;
if (status)
return true;
GLint log_length = 0;
glGetProgramPipelineiv(p, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) {
if (log_length > 0)
{
char* log = new char[log_length];
glGetProgramPipelineInfoLog(p, log_length, NULL, log);
fprintf(stderr, "%s", log);
@ -218,10 +235,13 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
header += "#extension GL_ARB_shading_language_420pack: require\n";
// Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n";
if (GLLoader::found_GL_ARB_shader_image_load_store) {
if (GLLoader::found_GL_ARB_shader_image_load_store)
{
// Need GL version 420
header += "#extension GL_ARB_shader_image_load_store: require\n";
} else {
}
else
{
header += "#define DISABLE_GL42_image\n";
}
if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel)
@ -234,7 +254,8 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
header += "#define pGL_ES 0\n";
// Allow to puts several shader in 1 files
switch (type) {
switch (type)
{
case GL_VERTEX_SHADER:
header += "#define VERTEX_SHADER 1\n";
break;
@ -244,7 +265,8 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
case GL_FRAGMENT_SHADER:
header += "#define FRAGMENT_SHADER 1\n";
break;
default: ASSERT(0);
default:
ASSERT(0);
}
// Select the entry point ie the main function
@ -276,7 +298,8 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent
bool status = ValidateProgram(program);
if (!status) {
if (!status)
{
// print extra info
fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), program);
fprintf(stderr, "\n%s", macro_sel.c_str());
@ -306,13 +329,14 @@ GLuint GSShaderOGL::CompileShader(const std::string& glsl_file, const std::strin
sources[1] = m_common_header.data();
sources[2] = glsl_h_code;
shader = glCreateShader(type);
shader = glCreateShader(type);
glShaderSource(shader, shader_nb, sources, NULL);
glCompileShader(shader);
bool status = ValidateShader(shader);
if (!status) {
if (!status)
{
// print extra info
fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), shader);
fprintf(stderr, "\n%s", macro_sel.c_str());
@ -331,12 +355,13 @@ GLuint GSShaderOGL::CompileShader(const std::string& glsl_file, const std::strin
// GLSL improvement (unfortunately).
int GSShaderOGL::DumpAsm(const std::string& file, GLuint p)
{
if (!GLLoader::vendor_id_nvidia) return 0;
if (!GLLoader::vendor_id_nvidia)
return 0;
GLint binaryLength;
GLint binaryLength;
glGetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
char* binary = new char[binaryLength+4];
char* binary = new char[binaryLength + 4];
GLenum binaryFormat;
glGetProgramBinary(p, binaryLength, NULL, &binaryFormat, binary);
@ -345,18 +370,24 @@ int GSShaderOGL::DumpAsm(const std::string& file, GLuint p)
// Search the magic number "!!"
int asm_ = 0;
while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_+1] != '!')) {
while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_ + 1] != '!'))
{
asm_ += 1;
}
int instructions = -1;
if (asm_ < binaryLength) {
if (asm_ < binaryLength)
{
// Now print asm as text
char* asm_txt = strtok(&binary[asm_], "\n");
while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5))) {
if (!strncmp(asm_txt, "OUT", 3) || !strncmp(asm_txt, "TEMP", 4) || !strncmp(asm_txt, "LONG", 4)) {
while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5)))
{
if (!strncmp(asm_txt, "OUT", 3) || !strncmp(asm_txt, "TEMP", 4) || !strncmp(asm_txt, "LONG", 4))
{
instructions = 0;
} else if (instructions >= 0) {
}
else if (instructions >= 0)
{
if (instructions == 0)
fprintf(outfile, "\n");
instructions++;
@ -369,7 +400,8 @@ int GSShaderOGL::DumpAsm(const std::string& file, GLuint p)
}
fclose(outfile);
if (instructions < 0) {
if (instructions < 0)
{
// RAW dump in case of error
fprintf(stderr, "Error: failed to find the number of instructions!\n");
outfile = fopen(file.c_str(), "wb");

View File

@ -21,7 +21,8 @@
#pragma once
class GSShaderOGL {
class GSShaderOGL
{
GLuint m_pipeline;
std::unordered_map<uint32, GLuint> m_program;
const bool m_debug_shader;
@ -37,7 +38,7 @@ class GSShaderOGL {
std::string GenGlslHeader(const std::string& entry, GLenum type, const std::string& macro);
std::vector<char> m_common_header;
public:
public:
GSShaderOGL(bool debug);
~GSShaderOGL();

View File

@ -76,18 +76,18 @@ void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r)
GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy();
if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader))
if (GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader))
{
GSTexture::GSMap m;
GSVector4i r_offscreen(0, 0, r.width(), r.height());
if(offscreen->Map(m, &r_offscreen))
if (offscreen->Map(m, &r_offscreen))
{
// TODO: block level write
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
switch(TEX0.PSM)
switch (TEX0.PSM)
{
case PSM_PSMCT32:
case PSM_PSMZ32:
@ -123,13 +123,15 @@ void GSTextureCacheOGL::Read(Source* t, const GSVector4i& r)
// FIXME Create a get function to avoid the useless copy
// Note: With openGL 4.5 you can use glGetTextureSubImage
if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) {
if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height()))
{
m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r);
GSTexture::GSMap m;
GSVector4i r_offscreen(0, 0, r.width(), r.height());
if (offscreen->Map(m, &r_offscreen)) {
if (offscreen->Map(m, &r_offscreen))
{
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);

View File

@ -28,7 +28,7 @@
class GSTextureCacheOGL final : public GSTextureCache
{
protected:
int Get8bitFormat() { return GL_R8;}
int Get8bitFormat() { return GL_R8; }
void Read(Target* t, const GSVector4i& r);
void Read(Source* t, const GSVector4i& r);

View File

@ -30,16 +30,17 @@ extern uint64 g_real_texture_upload_byte;
#endif
// FIXME OGL4: investigate, only 1 unpack buffer always bound
namespace PboPool {
namespace PboPool
{
const uint32 m_pbo_size = 64*1024*1024;
const uint32 m_seg_size = 16*1024*1024;
const uint32 m_pbo_size = 64 * 1024 * 1024;
const uint32 m_seg_size = 16 * 1024 * 1024;
GLuint m_buffer;
uptr m_offset;
char* m_map;
uptr m_offset;
char* m_map;
uint32 m_size;
GLsync m_fence[m_pbo_size/m_seg_size];
GLsync m_fence[m_pbo_size / m_seg_size];
// Option for buffer storage
// XXX: actually does I really need coherent and barrier???
@ -49,7 +50,8 @@ namespace PboPool {
const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT;
const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT;
void Init() {
void Init()
{
glGenBuffers(1, &m_buffer);
BindPbo();
@ -57,22 +59,25 @@ namespace PboPool {
glObjectLabel(GL_BUFFER, m_buffer, -1, "PBO");
glBufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, create_flags);
m_map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags);
m_map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags);
m_offset = 0;
for (size_t i = 0; i < countof(m_fence); i++) {
for (size_t i = 0; i < countof(m_fence); i++)
{
m_fence[i] = 0;
}
UnbindPbo();
}
char* Map(uint32 size) {
char* Map(uint32 size)
{
char* map;
// Note: keep offset aligned for SSE/AVX
m_size = (size + 63) & ~0x3F;
if (m_size > m_pbo_size) {
if (m_size > m_pbo_size)
{
fprintf(stderr, "BUG: PBO too small %u but need %u\n", m_pbo_size, m_size);
}
@ -87,41 +92,50 @@ namespace PboPool {
return map;
}
void Unmap() {
void Unmap()
{
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset, m_size);
}
uptr Offset() {
uptr Offset()
{
return m_offset;
}
void Destroy() {
m_map = NULL;
void Destroy()
{
m_map = NULL;
m_offset = 0;
for (size_t i = 0; i < countof(m_fence); i++) {
for (size_t i = 0; i < countof(m_fence); i++)
{
glDeleteSync(m_fence[i]);
}
glDeleteBuffers(1, &m_buffer);
}
void BindPbo() {
void BindPbo()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer);
}
void Sync() {
void Sync()
{
uint32 segment_current = m_offset / m_seg_size;
uint32 segment_next = (m_offset + m_size) / m_seg_size;
uint32 segment_next = (m_offset + m_size) / m_seg_size;
if (segment_current != segment_next) {
if (segment_next >= countof(m_fence)) {
if (segment_current != segment_next)
{
if (segment_next >= countof(m_fence))
{
segment_next = 0;
}
// Align current transfer on the start of the segment
m_offset = m_seg_size * segment_next;
if (m_size > m_seg_size) {
if (m_size > m_seg_size)
{
fprintf(stderr, "BUG: PBO Map size %u is bigger than a single segment %u. Crossing more than one fence is not supported yet, texture data may be corrupted.\n", m_size, m_seg_size);
// TODO Synchronize all crossed fences
}
@ -130,10 +144,12 @@ namespace PboPool {
m_fence[segment_current] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
// Check next segment is free
if (m_fence[segment_next]) {
if (m_fence[segment_next])
{
GLenum status = glClientWaitSync(m_fence[segment_next], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
// Potentially it doesn't work on AMD driver which might always return GL_CONDITION_SATISFIED
if (status != GL_ALREADY_SIGNALED) {
if (status != GL_ALREADY_SIGNALED)
{
GL_PERF("GL_PIXEL_UNPACK_BUFFER: Sync Sync (%x)! Buffer too small ?", status);
}
@ -143,21 +159,23 @@ namespace PboPool {
}
}
void UnbindPbo() {
void UnbindPbo()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
void EndTransfer() {
void EndTransfer()
{
m_offset += m_size;
}
}
} // namespace PboPool
GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap)
: m_clean(false), m_generate_mipmap(true), m_local_buffer(nullptr), m_r_x(0), m_r_y(0), m_r_w(0), m_r_h(0), m_layer(0)
{
// OpenGL didn't like dimensions of size 0
m_size.x = std::max(1,w);
m_size.y = std::max(1,h);
m_size.x = std::max(1, w);
m_size.y = std::max(1, h);
m_format = format;
m_type = type;
m_fbo_read = fbo_read;
@ -166,7 +184,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
m_max_layer = 1;
// Bunch of constant parameter
switch (m_format) {
switch (m_format)
{
// 1 Channel integer
case GL_R32UI:
case GL_R32I:
@ -240,7 +259,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
ASSERT(0);
}
switch (m_type) {
switch (m_type)
{
case GSTexture::Backbuffer:
return; // backbuffer isn't a real texture
case GSTexture::Offscreen:
@ -249,7 +269,7 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
break;
case GSTexture::Texture:
// Only 32 bits input texture will be supported for mipmap
m_max_layer = mipmap && m_format == GL_RGBA8 ? (int)log2(std::max(w,h)) : 1;
m_max_layer = mipmap && m_format == GL_RGBA8 ? (int)log2(std::max(w, h)) : 1;
break;
case SparseRenderTarget:
case SparseDepthStencil:
@ -259,7 +279,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
break;
}
switch (m_format) {
switch (m_format)
{
case GL_R16UI:
case GL_R8:
m_sparse &= GLLoader::found_compatible_GL_ARB_sparse_texture2;
@ -294,21 +315,26 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
// Create a gl object (texture isn't allocated here)
glCreateTextures(GL_TEXTURE_2D, 1, &m_texture_id);
if (m_format == GL_R8) {
if (m_format == GL_R8)
{
// Emulate DX behavior, beside it avoid special code in shader to differentiate
// palette texture from a GL_RGBA target or a GL_R texture.
glTextureParameteri(m_texture_id, GL_TEXTURE_SWIZZLE_A, GL_RED);
}
if (m_sparse) {
if (m_sparse)
{
GSVector2i old_size = m_size;
m_size = RoundUpPage(m_size);
if (m_size != old_size) {
if (m_size != old_size)
{
fprintf(stderr, "Sparse texture size (%dx%d) isn't a multiple of gpu page size (%dx%d)\n",
old_size.x, old_size.y, m_gpu_page_size.x, m_gpu_page_size.y);
}
glTextureParameteri(m_texture_id, GL_TEXTURE_SPARSE_ARB, true);
} else {
}
else
{
m_committed_size = m_size;
}
@ -316,7 +342,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
static int every_512 = 0;
GLState::available_vram -= m_mem_usage;
if ((GLState::available_vram < 0) && (every_512 % 512 == 0)) {
if ((GLState::available_vram < 0) && (every_512 % 512 == 0))
{
fprintf(stderr, "Available VRAM is very low (%lld), a crash is expected! Enable conservative buffer allocation or reduce upscaling!\n", GLState::available_vram);
every_512++;
// Pull emergency break
@ -334,7 +361,8 @@ GSTextureOGL::~GSTextureOGL()
GLState::rt = 0;
if (m_texture_id == GLState::ds)
GLState::ds = 0;
for (size_t i = 0; i < countof(GLState::tex_unit); i++) {
for (size_t i = 0; i < countof(GLState::tex_unit); i++)
{
if (m_texture_id == GLState::tex_unit[i])
GLState::tex_unit[i] = 0;
}
@ -406,7 +434,8 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
// PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch
// Note: row_byte != pitch
for (int h = 0; h < r.height(); h++) {
for (int h = 0; h < r.height(); h++)
{
memcpy(map, src, row_byte);
map += row_byte;
src += pitch;
@ -434,13 +463,14 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y);
// Will need some investigation
ASSERT(r.width() != 0);
ASSERT(r.width() != 0);
ASSERT(r.height() != 0);
uint32 row_byte = r.width() << m_int_shift;
m.pitch = row_byte;
if (m_type == GSTexture::Offscreen) {
if (m_type == GSTexture::Offscreen)
{
// The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GSdx
// architecture is waiting the data right now.
@ -467,7 +497,9 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
m.bits = m_local_buffer;
return true;
} else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) {
}
else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget)
{
GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap
m_clean = false;
@ -477,7 +509,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
m.bits = (uint8*)PboPool::Map(map_size);
#ifdef ENABLE_OGL_DEBUG_MEM_BW
g_real_texture_upload_byte += map_size;
g_real_texture_upload_byte += map_size;
#endif
// Save the area for the unmap
@ -495,7 +527,8 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
void GSTextureOGL::Unmap()
{
if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) {
if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget)
{
PboPool::Unmap();
@ -514,7 +547,8 @@ void GSTextureOGL::Unmap()
void GSTextureOGL::GenerateMipmap()
{
if (m_generate_mipmap && m_max_layer > 1) {
if (m_generate_mipmap && m_max_layer > 1)
{
glGenerateTextureMipmap(m_texture_id);
m_generate_mipmap = false;
}
@ -524,12 +558,16 @@ void GSTextureOGL::CommitPages(const GSVector2i& region, bool commit)
{
GLState::available_vram += m_mem_usage;
if (commit) {
if (m_committed_size.x == 0) {
if (commit)
{
if (m_committed_size.x == 0)
{
// Nothing allocated so far
GL_INS("CommitPages initial %dx%d of %u", region.x, region.y, m_texture_id);
glTexturePageCommitmentEXT(m_texture_id, GL_TEX_LEVEL_0, 0, 0, 0, region.x, region.y, 1, commit);
} else {
}
else
{
GL_INS("CommitPages extend %dx%d to %dx%d of %u", m_committed_size.x, m_committed_size.y, region.x, region.y, m_texture_id);
int w = region.x - m_committed_size.x;
int h = region.y - m_committed_size.y;
@ -539,8 +577,9 @@ void GSTextureOGL::CommitPages(const GSVector2i& region, bool commit)
glTexturePageCommitmentEXT(m_texture_id, GL_TEX_LEVEL_0, 0, m_committed_size.y, 0, region.x, h, 1, commit);
}
m_committed_size = region;
} else {
}
else
{
// Release everything
GL_INS("CommitPages release of %u", m_texture_id);
@ -557,7 +596,7 @@ bool GSTextureOGL::Save(const std::string& fn)
{
// Collect the texture data
uint32 pitch = 4 * m_committed_size.x;
uint32 buf_size = pitch * m_committed_size.y * 2;// Note *2 for security (depth/stencil)
uint32 buf_size = pitch * m_committed_size.y * 2; // Note *2 for security (depth/stencil)
std::unique_ptr<uint8[]> image(new uint8[buf_size]);
#ifdef ENABLE_OGL_DEBUG
GSPng::Format fmt = GSPng::RGB_A_PNG;
@ -565,9 +604,12 @@ bool GSTextureOGL::Save(const std::string& fn)
GSPng::Format fmt = GSPng::RGB_PNG;
#endif
if (IsBackbuffer()) {
if (IsBackbuffer())
{
glReadPixels(0, 0, m_committed_size.x, m_committed_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get());
} else if(IsDss()) {
}
else if (IsDss())
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_texture_id, 0);
@ -576,18 +618,23 @@ bool GSTextureOGL::Save(const std::string& fn)
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
fmt = GSPng::RGB_A_PNG;
} else if(m_format == GL_R32I) {
}
else if (m_format == GL_R32I)
{
// Note: 4.5 function used for accurate DATE
// barely used outside of dev and not sparse anyway
glGetTextureImage(m_texture_id, 0, GL_RED_INTEGER, GL_INT, buf_size, image.get());
fmt = GSPng::R32I_PNG;
} else {
}
else
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0);
if (m_format == GL_RGBA8) {
if (m_format == GL_RGBA8)
{
glReadPixels(0, 0, m_committed_size.x, m_committed_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get());
}
else if (m_format == GL_R16UI)

View File

@ -23,68 +23,69 @@
#include "Renderers/Common/GSTexture.h"
namespace PboPool {
namespace PboPool
{
inline void BindPbo();
inline void UnbindPbo();
inline void Sync();
inline char* Map(uint32 size);
inline void Unmap();
inline uptr Offset();
inline void EndTransfer();
inline void Unmap();
inline uptr Offset();
inline void EndTransfer();
void Init();
void Destroy();
}
} // namespace PboPool
class GSTextureOGL final : public GSTexture
{
private:
GLuint m_texture_id; // the texture id
GLuint m_fbo_read;
bool m_clean;
bool m_generate_mipmap;
private:
GLuint m_texture_id; // the texture id
GLuint m_fbo_read;
bool m_clean;
bool m_generate_mipmap;
uint8* m_local_buffer;
// Avoid alignment constrain
//GSVector4i m_r;
int m_r_x;
int m_r_y;
int m_r_w;
int m_r_h;
int m_layer;
int m_max_layer;
uint8* m_local_buffer;
// Avoid alignment constrain
//GSVector4i m_r;
int m_r_x;
int m_r_y;
int m_r_w;
int m_r_h;
int m_layer;
int m_max_layer;
// internal opengl format/type/alignment
GLenum m_int_format;
GLenum m_int_type;
uint32 m_int_shift;
// internal opengl format/type/alignment
GLenum m_int_format;
GLenum m_int_type;
uint32 m_int_shift;
// Allow to track size of allocated memory
uint32 m_mem_usage;
// Allow to track size of allocated memory
uint32 m_mem_usage;
public:
explicit GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap);
virtual ~GSTextureOGL();
public:
explicit GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap);
virtual ~GSTextureOGL();
bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) final;
bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) final;
void Unmap() final;
void GenerateMipmap() final;
bool Save(const std::string& fn) final;
bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) final;
bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) final;
void Unmap() final;
void GenerateMipmap() final;
bool Save(const std::string& fn) final;
bool IsBackbuffer() { return (m_type == GSTexture::Backbuffer); }
bool IsDss() { return (m_type == GSTexture::DepthStencil || m_type == GSTexture::SparseDepthStencil); }
bool IsBackbuffer() { return (m_type == GSTexture::Backbuffer); }
bool IsDss() { return (m_type == GSTexture::DepthStencil || m_type == GSTexture::SparseDepthStencil); }
uint32 GetID() final { return m_texture_id; }
bool HasBeenCleaned() { return m_clean; }
void WasAttached() { m_clean = false; }
void WasCleaned() { m_clean = true; }
uint32 GetID() final { return m_texture_id; }
bool HasBeenCleaned() { return m_clean; }
void WasAttached() { m_clean = false; }
void WasCleaned() { m_clean = true; }
void Clear(const void* data);
void Clear(const void* data, const GSVector4i& area);
void Clear(const void* data);
void Clear(const void* data, const GSVector4i& area);
void CommitPages(const GSVector2i& region, bool commit) final;
void CommitPages(const GSVector2i& region, bool commit) final;
uint32 GetMemUsage();
uint32 GetMemUsage();
};

View File

@ -28,11 +28,12 @@ extern uint64 g_uniform_upload_byte;
#endif
class GSUniformBufferOGL {
GLuint m_buffer; // data object
GLuint m_index; // GLSL slot
uint32 m_size; // size of the data
uint8* m_cache; // content of the previous upload
class GSUniformBufferOGL
{
GLuint m_buffer; // data object
GLuint m_index; // GLSL slot
uint32 m_size; // size of the data
uint8* m_cache; // content of the previous upload
public:
GSUniformBufferOGL(const std::string& pretty_name, GLuint index, uint32 size)
@ -49,7 +50,8 @@ public:
void bind()
{
if (GLState::ubo != m_buffer) {
if (GLState::ubo != m_buffer)
{
GLState::ubo = m_buffer;
glBindBuffer(GL_UNIFORM_BUFFER, m_buffer);
}
@ -82,7 +84,8 @@ public:
void cache_upload(const void* src)
{
if (memcmp(m_cache, src, m_size) != 0) {
if (memcmp(m_cache, src, m_size) != 0)
{
memcpy(m_cache, src, m_size);
upload(src);
}
@ -95,18 +98,19 @@ public:
}
};
#define UBO_BUFFER_SIZE (4*1024*1024)
#define UBO_BUFFER_SIZE (4 * 1024 * 1024)
class GSUniformBufferStorageOGL {
GLuint m_buffer; // data object
GLuint m_index; // GLSL slot
uint32 m_size; // size of the data
class GSUniformBufferStorageOGL
{
GLuint m_buffer; // data object
GLuint m_index; // GLSL slot
uint32 m_size; // size of the data
uint8* m_buffer_ptr;
uint32 m_offset;
public:
GSUniformBufferStorageOGL(GLuint index, uint32 size) : m_index(index)
, m_size(size), m_offset(0)
GSUniformBufferStorageOGL(GLuint index, uint32 size)
: m_index(index) , m_size(size) , m_offset(0)
{
glGenBuffers(1, &m_buffer);
bind();
@ -116,7 +120,8 @@ public:
void bind()
{
if (GLState::ubo != m_buffer) {
if (GLState::ubo != m_buffer)
{
GLState::ubo = m_buffer;
glBindBuffer(GL_UNIFORM_BUFFER, m_buffer);
}
@ -130,7 +135,7 @@ public:
GLsizei buffer_size = UBO_BUFFER_SIZE;
glBufferStorage(GL_UNIFORM_BUFFER, buffer_size, NULL, create_flags);
m_buffer_ptr = (uint8*) glMapBufferRange(GL_UNIFORM_BUFFER, 0, buffer_size, map_flags);
m_buffer_ptr = (uint8*)glMapBufferRange(GL_UNIFORM_BUFFER, 0, buffer_size, map_flags);
ASSERT(m_buffer_ptr);
}
@ -159,7 +164,8 @@ public:
m_offset = 0;
}
~GSUniformBufferStorageOGL() {
~GSUniformBufferStorageOGL()
{
glDeleteBuffers(1, &m_buffer);
}
};

View File

@ -27,27 +27,29 @@
extern uint64 g_vertex_upload_byte;
#endif
struct GSInputLayoutOGL {
GLint location;
GLint size;
GLenum type;
struct GSInputLayoutOGL
{
GLint location;
GLint size;
GLenum type;
GLboolean normalize;
GLsizei stride;
const GLvoid* offset;
};
template<int STRIDE>
class GSBufferOGL {
template <int STRIDE>
class GSBufferOGL
{
size_t m_start;
size_t m_count;
size_t m_limit;
size_t m_quarter_shift;
const GLenum m_target;
const GLenum m_target;
GLuint m_buffer_name;
uint8* m_buffer_ptr;
uint8* m_buffer_ptr;
GLsync m_fence[5];
public:
public:
GSBufferOGL(GLenum target, size_t count)
: m_start(0)
, m_count(0)
@ -60,7 +62,8 @@ class GSBufferOGL {
m_limit = 1u << (1u + (size_t)std::log2(count - 1u));
m_quarter_shift = (size_t)std::log2(m_limit * STRIDE) - 2;
for (size_t i = 0; i < 5; i++) {
for (size_t i = 0; i < 5; i++)
{
m_fence[i] = 0;
}
@ -78,16 +81,19 @@ class GSBufferOGL {
const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT;
const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT;
glBufferStorage(m_target, STRIDE * m_limit, NULL, create_flags );
m_buffer_ptr = (uint8*) glMapBufferRange(m_target, 0, STRIDE * m_limit, map_flags);
if (!m_buffer_ptr) {
glBufferStorage(m_target, STRIDE * m_limit, NULL, create_flags);
m_buffer_ptr = (uint8*)glMapBufferRange(m_target, 0, STRIDE * m_limit, map_flags);
if (!m_buffer_ptr)
{
fprintf(stderr, "Failed to map buffer\n");
throw GSDXError();
}
}
~GSBufferOGL() {
for (size_t i = 0; i < 5; i++) {
~GSBufferOGL()
{
for (size_t i = 0; i < 5; i++)
{
glDeleteSync(m_fence[i]);
}
glDeleteBuffers(1, &m_buffer_name);
@ -108,14 +114,16 @@ class GSBufferOGL {
size_t offset = m_start * STRIDE;
size_t length = m_count * STRIDE;
if (m_count > (m_limit - m_start) ) {
if (m_count > (m_limit - m_start))
{
size_t current_chunk = offset >> m_quarter_shift;
#ifdef ENABLE_OGL_DEBUG_FENCE
fprintf(stderr, "%x: Wrap buffer\n", m_target);
fprintf(stderr, "%x: Insert a fence in chunk %zu\n", m_target, current_chunk);
#endif
ASSERT(current_chunk > 0 && current_chunk < 5);
if (m_fence[current_chunk] == 0) {
if (m_fence[current_chunk] == 0)
{
m_fence[current_chunk] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
@ -124,10 +132,12 @@ class GSBufferOGL {
offset = 0;
// Only check first chunk
if (m_fence[0]) {
if (m_fence[0])
{
#ifdef ENABLE_OGL_DEBUG_FENCE
GLenum status = glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
if (status != GL_ALREADY_SIGNALED) {
if (status != GL_ALREADY_SIGNALED)
{
fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target);
}
#else
@ -141,13 +151,15 @@ class GSBufferOGL {
// Protect buffer with fences
size_t current_chunk = offset >> m_quarter_shift;
size_t next_chunk = (offset + length) >> m_quarter_shift;
for (size_t c = current_chunk + 1; c <= next_chunk; c++) {
for (size_t c = current_chunk + 1; c <= next_chunk; c++)
{
#ifdef ENABLE_OGL_DEBUG_FENCE
fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c-1);
fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c - 1);
#endif
ASSERT(c > 0 && c < 5);
m_fence[c-1] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
if (m_fence[c]) {
m_fence[c - 1] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
if (m_fence[c])
{
#ifdef ENABLE_OGL_DEBUG_FENCE
GLenum status = glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
#else
@ -157,7 +169,8 @@ class GSBufferOGL {
m_fence[c] = 0;
#ifdef ENABLE_OGL_DEBUG_FENCE
if (status != GL_ALREADY_SIGNALED) {
if (status != GL_ALREADY_SIGNALED)
{
fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target);
}
#endif
@ -211,10 +224,10 @@ class GSBufferOGL {
}
size_t GetStart() { return m_start; }
};
class GSVertexBufferStateOGL {
class GSVertexBufferStateOGL
{
std::unique_ptr<GSBufferOGL<sizeof(GSVertexPT1)>> m_vb;
std::unique_ptr<GSBufferOGL<sizeof(uint32)>> m_ib;
@ -223,10 +236,11 @@ class GSVertexBufferStateOGL {
std::vector<GSInputLayoutOGL> m_layout;
// No copy constructor please
GSVertexBufferStateOGL(const GSVertexBufferStateOGL& ) = delete;
GSVertexBufferStateOGL(const GSVertexBufferStateOGL&) = delete;
public:
GSVertexBufferStateOGL(const std::vector<GSInputLayoutOGL>& layout) : m_topology(0), m_layout(layout)
GSVertexBufferStateOGL(const std::vector<GSInputLayoutOGL>& layout)
: m_topology(0), m_layout(layout)
{
glGenVertexArrays(1, &m_va);
glBindVertexArray(m_va);
@ -250,15 +264,20 @@ public:
void set_internal_format()
{
for (const auto &l : m_layout) {
for (const auto& l : m_layout)
{
// Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer
glEnableVertexAttribArray(l.location);
switch (l.type) {
switch (l.type)
{
case GL_UNSIGNED_SHORT:
case GL_UNSIGNED_INT:
if (l.normalize) {
if (l.normalize)
{
glVertexAttribPointer(l.location, l.size, l.type, l.normalize, l.stride, l.offset);
} else {
}
else
{
// Rule: when shader use integral (not normalized) you must use glVertexAttribIPointer (note the extra I)
glVertexAttribIPointer(l.location, l.size, l.type, l.stride, l.offset);
}
@ -280,19 +299,24 @@ public:
void DrawPrimitive(int offset, int count) { m_vb->Draw(m_topology, offset, count); }
void DrawIndexedPrimitive() { m_ib->Draw(m_topology, m_vb->GetStart() ); }
void DrawIndexedPrimitive() { m_ib->Draw(m_topology, m_vb->GetStart()); }
void DrawIndexedPrimitive(int offset, int count) { m_ib->Draw(m_topology, m_vb->GetStart(), offset, count ); }
void DrawIndexedPrimitive(int offset, int count) { m_ib->Draw(m_topology, m_vb->GetStart(), offset, count); }
void SetTopology(GLenum topology) { m_topology = topology; }
void* MapVB(size_t count) {
void *ptr;
while (true) {
try {
void* MapVB(size_t count)
{
void* ptr;
while (true)
{
try
{
ptr = m_vb->map(count);
break;
} catch (GSDXErrorGlVertexArrayTooSmall) {
}
catch (GSDXErrorGlVertexArrayTooSmall)
{
GL_INS("GL vertex buffer is too small");
m_vb.reset(new GSBufferOGL<sizeof(GSVertexPT1)>(GL_ARRAY_BUFFER, count));
@ -304,12 +328,17 @@ public:
return ptr;
}
void UnmapVB() { m_vb->unmap(); }
void UploadVB(const void* vertices, size_t count) {
while (true) {
try {
void UploadVB(const void* vertices, size_t count)
{
while (true)
{
try
{
m_vb->upload(vertices, count);
break;
} catch (GSDXErrorGlVertexArrayTooSmall) {
}
catch (GSDXErrorGlVertexArrayTooSmall)
{
GL_INS("GL vertex buffer is too small");
m_vb.reset(new GSBufferOGL<sizeof(GSVertexPT1)>(GL_ARRAY_BUFFER, count));
@ -319,12 +348,17 @@ public:
}
}
void UploadIB(const void* index, size_t count) {
while (true) {
try {
void UploadIB(const void* index, size_t count)
{
while (true)
{
try
{
m_ib->upload(index, count);
break;
} catch (GSDXErrorGlVertexArrayTooSmall) {
}
catch (GSDXErrorGlVertexArrayTooSmall)
{
GL_INS("GL index buffer is too small");
m_ib.reset(new GSBufferOGL<sizeof(uint32)>(GL_ELEMENT_ARRAY_BUFFER, count));
@ -336,5 +370,4 @@ public:
{
glDeleteVertexArrays(1, &m_va);
}
};

View File

@ -39,7 +39,7 @@ extern "C" {
#define APIENTRY
#endif
#ifndef APIENTRYP
#define APIENTRYP APIENTRY *
#define APIENTRYP APIENTRY*
#endif
#ifndef GLAPI
#define GLAPI extern
@ -47,7 +47,7 @@ extern "C" {
#ifndef GL_VERSION_4_3
#define GL_VERSION_4_3 1
typedef void (APIENTRY *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam);
typedef void(APIENTRY* GLDEBUGPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* message, const void* userParam);
#define GL_NUM_SHADING_LANGUAGE_VERSIONS 0x82E9
#define GL_VERTEX_ATTRIB_ARRAY_LONG 0x874E
#define GL_COMPRESSED_RGB8_ETC2 0x9274
@ -306,49 +306,49 @@ typedef void (APIENTRY *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum
#define GL_MAX_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D9
#define GL_MAX_VERTEX_ATTRIB_BINDINGS 0x82DA
#define GL_VERTEX_BINDING_BUFFER 0x8F4F
typedef void (APIENTRYP PFNGLCLEARBUFFERDATAPROC) (GLenum target, GLenum internalformat, GLenum format, GLenum type, const void *data);
typedef void (APIENTRYP PFNGLCLEARBUFFERSUBDATAPROC) (GLenum target, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void *data);
typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC) (GLintptr indirect);
typedef void (APIENTRYP PFNGLCOPYIMAGESUBDATAPROC) (GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth);
typedef void (APIENTRYP PFNGLFRAMEBUFFERPARAMETERIPROC) (GLenum target, GLenum pname, GLint param);
typedef void (APIENTRYP PFNGLGETFRAMEBUFFERPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
typedef void (APIENTRYP PFNGLGETINTERNALFORMATI64VPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint64 *params);
typedef void (APIENTRYP PFNGLINVALIDATETEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth);
typedef void (APIENTRYP PFNGLINVALIDATETEXIMAGEPROC) (GLuint texture, GLint level);
typedef void (APIENTRYP PFNGLINVALIDATEBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizeiptr length);
typedef void (APIENTRYP PFNGLINVALIDATEBUFFERDATAPROC) (GLuint buffer);
typedef void (APIENTRYP PFNGLINVALIDATEFRAMEBUFFERPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments);
typedef void (APIENTRYP PFNGLINVALIDATESUBFRAMEBUFFERPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTPROC) (GLenum mode, const void *indirect, GLsizei drawcount, GLsizei stride);
typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTPROC) (GLenum mode, GLenum type, const void *indirect, GLsizei drawcount, GLsizei stride);
typedef void (APIENTRYP PFNGLGETPROGRAMINTERFACEIVPROC) (GLuint program, GLenum programInterface, GLenum pname, GLint *params);
typedef GLuint (APIENTRYP PFNGLGETPROGRAMRESOURCEINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name);
typedef void (APIENTRYP PFNGLGETPROGRAMRESOURCENAMEPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name);
typedef void (APIENTRYP PFNGLGETPROGRAMRESOURCEIVPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params);
typedef GLint (APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONPROC) (GLuint program, GLenum programInterface, const GLchar *name);
typedef GLint (APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name);
typedef void (APIENTRYP PFNGLSHADERSTORAGEBLOCKBINDINGPROC) (GLuint program, GLuint storageBlockIndex, GLuint storageBlockBinding);
typedef void (APIENTRYP PFNGLTEXBUFFERRANGEPROC) (GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void (APIENTRYP PFNGLTEXSTORAGE2DMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations);
typedef void (APIENTRYP PFNGLTEXSTORAGE3DMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
typedef void (APIENTRYP PFNGLTEXTUREVIEWPROC) (GLuint texture, GLenum target, GLuint origtexture, GLenum internalformat, GLuint minlevel, GLuint numlevels, GLuint minlayer, GLuint numlayers);
typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERPROC) (GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
typedef void (APIENTRYP PFNGLVERTEXATTRIBFORMATPROC) (GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset);
typedef void (APIENTRYP PFNGLVERTEXATTRIBIFORMATPROC) (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void (APIENTRYP PFNGLVERTEXATTRIBLFORMATPROC) (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void (APIENTRYP PFNGLVERTEXATTRIBBINDINGPROC) (GLuint attribindex, GLuint bindingindex);
typedef void (APIENTRYP PFNGLVERTEXBINDINGDIVISORPROC) (GLuint bindingindex, GLuint divisor);
typedef void (APIENTRYP PFNGLDEBUGMESSAGECONTROLPROC) (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled);
typedef void (APIENTRYP PFNGLDEBUGMESSAGEINSERTPROC) (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf);
typedef void (APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC) (GLDEBUGPROC callback, const void *userParam);
typedef GLuint (APIENTRYP PFNGLGETDEBUGMESSAGELOGPROC) (GLuint count, GLsizei bufSize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog);
typedef void (APIENTRYP PFNGLPUSHDEBUGGROUPPROC) (GLenum source, GLuint id, GLsizei length, const GLchar *message);
typedef void (APIENTRYP PFNGLPOPDEBUGGROUPPROC) (void);
typedef void (APIENTRYP PFNGLOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei length, const GLchar *label);
typedef void (APIENTRYP PFNGLGETOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label);
typedef void (APIENTRYP PFNGLOBJECTPTRLABELPROC) (const void *ptr, GLsizei length, const GLchar *label);
typedef void (APIENTRYP PFNGLGETOBJECTPTRLABELPROC) (const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label);
typedef void(APIENTRYP PFNGLCLEARBUFFERDATAPROC)(GLenum target, GLenum internalformat, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLCLEARBUFFERSUBDATAPROC)(GLenum target, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
typedef void(APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
typedef void(APIENTRYP PFNGLCOPYIMAGESUBDATAPROC)(GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth);
typedef void(APIENTRYP PFNGLFRAMEBUFFERPARAMETERIPROC)(GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLGETFRAMEBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETINTERNALFORMATI64VPROC)(GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint64* params);
typedef void(APIENTRYP PFNGLINVALIDATETEXSUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth);
typedef void(APIENTRYP PFNGLINVALIDATETEXIMAGEPROC)(GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLINVALIDATEBUFFERSUBDATAPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length);
typedef void(APIENTRYP PFNGLINVALIDATEBUFFERDATAPROC)(GLuint buffer);
typedef void(APIENTRYP PFNGLINVALIDATEFRAMEBUFFERPROC)(GLenum target, GLsizei numAttachments, const GLenum* attachments);
typedef void(APIENTRYP PFNGLINVALIDATESUBFRAMEBUFFERPROC)(GLenum target, GLsizei numAttachments, const GLenum* attachments, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTPROC)(GLenum mode, const void* indirect, GLsizei drawcount, GLsizei stride);
typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTPROC)(GLenum mode, GLenum type, const void* indirect, GLsizei drawcount, GLsizei stride);
typedef void(APIENTRYP PFNGLGETPROGRAMINTERFACEIVPROC)(GLuint program, GLenum programInterface, GLenum pname, GLint* params);
typedef GLuint(APIENTRYP PFNGLGETPROGRAMRESOURCEINDEXPROC)(GLuint program, GLenum programInterface, const GLchar* name);
typedef void(APIENTRYP PFNGLGETPROGRAMRESOURCENAMEPROC)(GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei* length, GLchar* name);
typedef void(APIENTRYP PFNGLGETPROGRAMRESOURCEIVPROC)(GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum* props, GLsizei bufSize, GLsizei* length, GLint* params);
typedef GLint(APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONPROC)(GLuint program, GLenum programInterface, const GLchar* name);
typedef GLint(APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC)(GLuint program, GLenum programInterface, const GLchar* name);
typedef void(APIENTRYP PFNGLSHADERSTORAGEBLOCKBINDINGPROC)(GLuint program, GLuint storageBlockIndex, GLuint storageBlockBinding);
typedef void(APIENTRYP PFNGLTEXBUFFERRANGEPROC)(GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void(APIENTRYP PFNGLTEXSTORAGE2DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXSTORAGE3DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXTUREVIEWPROC)(GLuint texture, GLenum target, GLuint origtexture, GLenum internalformat, GLuint minlevel, GLuint numlevels, GLuint minlayer, GLuint numlayers);
typedef void(APIENTRYP PFNGLBINDVERTEXBUFFERPROC)(GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
typedef void(APIENTRYP PFNGLVERTEXATTRIBFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXATTRIBIFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXATTRIBLFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXATTRIBBINDINGPROC)(GLuint attribindex, GLuint bindingindex);
typedef void(APIENTRYP PFNGLVERTEXBINDINGDIVISORPROC)(GLuint bindingindex, GLuint divisor);
typedef void(APIENTRYP PFNGLDEBUGMESSAGECONTROLPROC)(GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint* ids, GLboolean enabled);
typedef void(APIENTRYP PFNGLDEBUGMESSAGEINSERTPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* buf);
typedef void(APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC)(GLDEBUGPROC callback, const void* userParam);
typedef GLuint(APIENTRYP PFNGLGETDEBUGMESSAGELOGPROC)(GLuint count, GLsizei bufSize, GLenum* sources, GLenum* types, GLuint* ids, GLenum* severities, GLsizei* lengths, GLchar* messageLog);
typedef void(APIENTRYP PFNGLPUSHDEBUGGROUPPROC)(GLenum source, GLuint id, GLsizei length, const GLchar* message);
typedef void(APIENTRYP PFNGLPOPDEBUGGROUPPROC)(void);
typedef void(APIENTRYP PFNGLOBJECTLABELPROC)(GLenum identifier, GLuint name, GLsizei length, const GLchar* label);
typedef void(APIENTRYP PFNGLGETOBJECTLABELPROC)(GLenum identifier, GLuint name, GLsizei bufSize, GLsizei* length, GLchar* label);
typedef void(APIENTRYP PFNGLOBJECTPTRLABELPROC)(const void* ptr, GLsizei length, const GLchar* label);
typedef void(APIENTRYP PFNGLGETOBJECTPTRLABELPROC)(const void* ptr, GLsizei bufSize, GLsizei* length, GLchar* label);
#endif /* GL_VERSION_4_3 */
#ifndef GL_VERSION_4_4
@ -372,15 +372,15 @@ typedef void (APIENTRYP PFNGLGETOBJECTPTRLABELPROC) (const void *ptr, GLsizei bu
#define GL_QUERY_BUFFER_BINDING 0x9193
#define GL_QUERY_RESULT_NO_WAIT 0x9194
#define GL_MIRROR_CLAMP_TO_EDGE 0x8743
typedef void (APIENTRYP PFNGLBUFFERSTORAGEPROC) (GLenum target, GLsizeiptr size, const void *data, GLbitfield flags);
typedef void (APIENTRYP PFNGLCLEARTEXIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, const void *data);
typedef void (APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *data);
typedef void (APIENTRYP PFNGLBINDBUFFERSBASEPROC) (GLenum target, GLuint first, GLsizei count, const GLuint *buffers);
typedef void (APIENTRYP PFNGLBINDBUFFERSRANGEPROC) (GLenum target, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizeiptr *sizes);
typedef void (APIENTRYP PFNGLBINDTEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures);
typedef void (APIENTRYP PFNGLBINDSAMPLERSPROC) (GLuint first, GLsizei count, const GLuint *samplers);
typedef void (APIENTRYP PFNGLBINDIMAGETEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures);
typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERSPROC) (GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides);
typedef void(APIENTRYP PFNGLBUFFERSTORAGEPROC)(GLenum target, GLsizeiptr size, const void* data, GLbitfield flags);
typedef void(APIENTRYP PFNGLCLEARTEXIMAGEPROC)(GLuint texture, GLint level, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLBINDBUFFERSBASEPROC)(GLenum target, GLuint first, GLsizei count, const GLuint* buffers);
typedef void(APIENTRYP PFNGLBINDBUFFERSRANGEPROC)(GLenum target, GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets, const GLsizeiptr* sizes);
typedef void(APIENTRYP PFNGLBINDTEXTURESPROC)(GLuint first, GLsizei count, const GLuint* textures);
typedef void(APIENTRYP PFNGLBINDSAMPLERSPROC)(GLuint first, GLsizei count, const GLuint* samplers);
typedef void(APIENTRYP PFNGLBINDIMAGETEXTURESPROC)(GLuint first, GLsizei count, const GLuint* textures);
typedef void(APIENTRYP PFNGLBINDVERTEXBUFFERSPROC)(GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets, const GLsizei* strides);
#endif /* GL_VERSION_4_4 */
#ifndef GL_VERSION_4_5
@ -407,116 +407,116 @@ typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERSPROC) (GLuint first, GLsizei count
#define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT 0x00000004
#define GL_CONTEXT_RELEASE_BEHAVIOR 0x82FB
#define GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH 0x82FC
typedef void (APIENTRYP PFNGLCLIPCONTROLPROC) (GLenum origin, GLenum depth);
typedef void (APIENTRYP PFNGLCREATETRANSFORMFEEDBACKSPROC) (GLsizei n, GLuint *ids);
typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC) (GLuint xfb, GLuint index, GLuint buffer);
typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC) (GLuint xfb, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKIVPROC) (GLuint xfb, GLenum pname, GLint *param);
typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKI_VPROC) (GLuint xfb, GLenum pname, GLuint index, GLint *param);
typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKI64_VPROC) (GLuint xfb, GLenum pname, GLuint index, GLint64 *param);
typedef void (APIENTRYP PFNGLCREATEBUFFERSPROC) (GLsizei n, GLuint *buffers);
typedef void (APIENTRYP PFNGLNAMEDBUFFERSTORAGEPROC) (GLuint buffer, GLsizeiptr size, const void *data, GLbitfield flags);
typedef void (APIENTRYP PFNGLNAMEDBUFFERDATAPROC) (GLuint buffer, GLsizeiptr size, const void *data, GLenum usage);
typedef void (APIENTRYP PFNGLNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data);
typedef void (APIENTRYP PFNGLCOPYNAMEDBUFFERSUBDATAPROC) (GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
typedef void (APIENTRYP PFNGLCLEARNAMEDBUFFERDATAPROC) (GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void *data);
typedef void (APIENTRYP PFNGLCLEARNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void *data);
typedef void *(APIENTRYP PFNGLMAPNAMEDBUFFERPROC) (GLuint buffer, GLenum access);
typedef void *(APIENTRYP PFNGLMAPNAMEDBUFFERRANGEPROC) (GLuint buffer, GLintptr offset, GLsizeiptr length, GLbitfield access);
typedef GLboolean (APIENTRYP PFNGLUNMAPNAMEDBUFFERPROC) (GLuint buffer);
typedef void (APIENTRYP PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC) (GLuint buffer, GLintptr offset, GLsizeiptr length);
typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERIVPROC) (GLuint buffer, GLenum pname, GLint *params);
typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERI64VPROC) (GLuint buffer, GLenum pname, GLint64 *params);
typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPOINTERVPROC) (GLuint buffer, GLenum pname, void **params);
typedef void (APIENTRYP PFNGLGETNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, void *data);
typedef void (APIENTRYP PFNGLCREATEFRAMEBUFFERSPROC) (GLsizei n, GLuint *framebuffers);
typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC) (GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC) (GLuint framebuffer, GLenum pname, GLint param);
typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level);
typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer);
typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC) (GLuint framebuffer, GLenum buf);
typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC) (GLuint framebuffer, GLsizei n, const GLenum *bufs);
typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC) (GLuint framebuffer, GLenum src);
typedef void (APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC) (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments);
typedef void (APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC) (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERIVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint *value);
typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint *value);
typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat *value);
typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFIPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
typedef void (APIENTRYP PFNGLBLITNAMEDFRAMEBUFFERPROC) (GLuint readFramebuffer, GLuint drawFramebuffer, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
typedef GLenum (APIENTRYP PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC) (GLuint framebuffer, GLenum target);
typedef void (APIENTRYP PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC) (GLuint framebuffer, GLenum pname, GLint *param);
typedef void (APIENTRYP PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC) (GLuint framebuffer, GLenum attachment, GLenum pname, GLint *params);
typedef void (APIENTRYP PFNGLCREATERENDERBUFFERSPROC) (GLsizei n, GLuint *renderbuffers);
typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEPROC) (GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height);
typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC) (GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
typedef void (APIENTRYP PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC) (GLuint renderbuffer, GLenum pname, GLint *params);
typedef void (APIENTRYP PFNGLCREATETEXTURESPROC) (GLenum target, GLsizei n, GLuint *textures);
typedef void (APIENTRYP PFNGLTEXTUREBUFFERPROC) (GLuint texture, GLenum internalformat, GLuint buffer);
typedef void (APIENTRYP PFNGLTEXTUREBUFFERRANGEPROC) (GLuint texture, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void (APIENTRYP PFNGLTEXTURESTORAGE1DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width);
typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC) (GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations);
typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC) (GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE1DPROC) (GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void *pixels);
typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels);
typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE3DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels);
typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC) (GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void *data);
typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data);
typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data);
typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE1DPROC) (GLuint texture, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE3DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFPROC) (GLuint texture, GLenum pname, GLfloat param);
typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFVPROC) (GLuint texture, GLenum pname, const GLfloat *param);
typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIPROC) (GLuint texture, GLenum pname, GLint param);
typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIIVPROC) (GLuint texture, GLenum pname, const GLint *params);
typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIUIVPROC) (GLuint texture, GLenum pname, const GLuint *params);
typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIVPROC) (GLuint texture, GLenum pname, const GLint *param);
typedef void (APIENTRYP PFNGLGENERATETEXTUREMIPMAPPROC) (GLuint texture);
typedef void (APIENTRYP PFNGLBINDTEXTUREUNITPROC) (GLuint unit, GLuint texture);
typedef void (APIENTRYP PFNGLGETTEXTUREIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels);
typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC) (GLuint texture, GLint level, GLsizei bufSize, void *pixels);
typedef void (APIENTRYP PFNGLGETTEXTURELEVELPARAMETERFVPROC) (GLuint texture, GLint level, GLenum pname, GLfloat *params);
typedef void (APIENTRYP PFNGLGETTEXTURELEVELPARAMETERIVPROC) (GLuint texture, GLint level, GLenum pname, GLint *params);
typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERFVPROC) (GLuint texture, GLenum pname, GLfloat *params);
typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIIVPROC) (GLuint texture, GLenum pname, GLint *params);
typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIUIVPROC) (GLuint texture, GLenum pname, GLuint *params);
typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIVPROC) (GLuint texture, GLenum pname, GLint *params);
typedef void (APIENTRYP PFNGLCREATEVERTEXARRAYSPROC) (GLsizei n, GLuint *arrays);
typedef void (APIENTRYP PFNGLDISABLEVERTEXARRAYATTRIBPROC) (GLuint vaobj, GLuint index);
typedef void (APIENTRYP PFNGLENABLEVERTEXARRAYATTRIBPROC) (GLuint vaobj, GLuint index);
typedef void (APIENTRYP PFNGLVERTEXARRAYELEMENTBUFFERPROC) (GLuint vaobj, GLuint buffer);
typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERPROC) (GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERSPROC) (GLuint vaobj, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides);
typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBBINDINGPROC) (GLuint vaobj, GLuint attribindex, GLuint bindingindex);
typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset);
typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBIFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBLFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void (APIENTRYP PFNGLVERTEXARRAYBINDINGDIVISORPROC) (GLuint vaobj, GLuint bindingindex, GLuint divisor);
typedef void (APIENTRYP PFNGLGETVERTEXARRAYIVPROC) (GLuint vaobj, GLenum pname, GLint *param);
typedef void (APIENTRYP PFNGLGETVERTEXARRAYINDEXEDIVPROC) (GLuint vaobj, GLuint index, GLenum pname, GLint *param);
typedef void (APIENTRYP PFNGLGETVERTEXARRAYINDEXED64IVPROC) (GLuint vaobj, GLuint index, GLenum pname, GLint64 *param);
typedef void (APIENTRYP PFNGLCREATESAMPLERSPROC) (GLsizei n, GLuint *samplers);
typedef void (APIENTRYP PFNGLCREATEPROGRAMPIPELINESPROC) (GLsizei n, GLuint *pipelines);
typedef void (APIENTRYP PFNGLCREATEQUERIESPROC) (GLenum target, GLsizei n, GLuint *ids);
typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTI64VPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTIVPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTUI64VPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTUIVPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void (APIENTRYP PFNGLMEMORYBARRIERBYREGIONPROC) (GLbitfield barriers);
typedef void (APIENTRYP PFNGLGETTEXTURESUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void *pixels);
typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei bufSize, void *pixels);
typedef GLenum (APIENTRYP PFNGLGETGRAPHICSRESETSTATUSPROC) (void);
typedef void (APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEPROC) (GLenum target, GLint lod, GLsizei bufSize, void *pixels);
typedef void (APIENTRYP PFNGLGETNTEXIMAGEPROC) (GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels);
typedef void (APIENTRYP PFNGLGETNUNIFORMDVPROC) (GLuint program, GLint location, GLsizei bufSize, GLdouble *params);
typedef void (APIENTRYP PFNGLGETNUNIFORMFVPROC) (GLuint program, GLint location, GLsizei bufSize, GLfloat *params);
typedef void (APIENTRYP PFNGLGETNUNIFORMIVPROC) (GLuint program, GLint location, GLsizei bufSize, GLint *params);
typedef void (APIENTRYP PFNGLGETNUNIFORMUIVPROC) (GLuint program, GLint location, GLsizei bufSize, GLuint *params);
typedef void (APIENTRYP PFNGLREADNPIXELSPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data);
typedef void (APIENTRYP PFNGLTEXTUREBARRIERPROC) (void);
typedef void(APIENTRYP PFNGLCLIPCONTROLPROC)(GLenum origin, GLenum depth);
typedef void(APIENTRYP PFNGLCREATETRANSFORMFEEDBACKSPROC)(GLsizei n, GLuint* ids);
typedef void(APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC)(GLuint xfb, GLuint index, GLuint buffer);
typedef void(APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC)(GLuint xfb, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKIVPROC)(GLuint xfb, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKI_VPROC)(GLuint xfb, GLenum pname, GLuint index, GLint* param);
typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKI64_VPROC)(GLuint xfb, GLenum pname, GLuint index, GLint64* param);
typedef void(APIENTRYP PFNGLCREATEBUFFERSPROC)(GLsizei n, GLuint* buffers);
typedef void(APIENTRYP PFNGLNAMEDBUFFERSTORAGEPROC)(GLuint buffer, GLsizeiptr size, const void* data, GLbitfield flags);
typedef void(APIENTRYP PFNGLNAMEDBUFFERDATAPROC)(GLuint buffer, GLsizeiptr size, const void* data, GLenum usage);
typedef void(APIENTRYP PFNGLNAMEDBUFFERSUBDATAPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, const void* data);
typedef void(APIENTRYP PFNGLCOPYNAMEDBUFFERSUBDATAPROC)(GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
typedef void(APIENTRYP PFNGLCLEARNAMEDBUFFERDATAPROC)(GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLCLEARNAMEDBUFFERSUBDATAPROC)(GLuint buffer, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void* data);
typedef void*(APIENTRYP PFNGLMAPNAMEDBUFFERPROC)(GLuint buffer, GLenum access);
typedef void*(APIENTRYP PFNGLMAPNAMEDBUFFERRANGEPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length, GLbitfield access);
typedef GLboolean(APIENTRYP PFNGLUNMAPNAMEDBUFFERPROC)(GLuint buffer);
typedef void(APIENTRYP PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERIVPROC)(GLuint buffer, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERI64VPROC)(GLuint buffer, GLenum pname, GLint64* params);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPOINTERVPROC)(GLuint buffer, GLenum pname, void** params);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERSUBDATAPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, void* data);
typedef void(APIENTRYP PFNGLCREATEFRAMEBUFFERSPROC)(GLsizei n, GLuint* framebuffers);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC)(GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC)(GLuint framebuffer, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC)(GLuint framebuffer, GLenum buf);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC)(GLuint framebuffer, GLsizei n, const GLenum* bufs);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC)(GLuint framebuffer, GLenum src);
typedef void(APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC)(GLuint framebuffer, GLsizei numAttachments, const GLenum* attachments);
typedef void(APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC)(GLuint framebuffer, GLsizei numAttachments, const GLenum* attachments, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERIVPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint* value);
typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint* value);
typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFVPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat* value);
typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFIPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
typedef void(APIENTRYP PFNGLBLITNAMEDFRAMEBUFFERPROC)(GLuint readFramebuffer, GLuint drawFramebuffer, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
typedef GLenum(APIENTRYP PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC)(GLuint framebuffer, GLenum target);
typedef void(APIENTRYP PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC)(GLuint framebuffer, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLuint framebuffer, GLenum attachment, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLCREATERENDERBUFFERSPROC)(GLsizei n, GLuint* renderbuffers);
typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEPROC)(GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC)(GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC)(GLuint renderbuffer, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLCREATETEXTURESPROC)(GLenum target, GLsizei n, GLuint* textures);
typedef void(APIENTRYP PFNGLTEXTUREBUFFERPROC)(GLuint texture, GLenum internalformat, GLuint buffer);
typedef void(APIENTRYP PFNGLTEXTUREBUFFERRANGEPROC)(GLuint texture, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE1DPROC)(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE2DPROC)(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE3DPROC)(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC)(GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC)(GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE1DPROC)(GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE2DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE3DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC)(GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE1DPROC)(GLuint texture, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE2DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE3DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERFPROC)(GLuint texture, GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERFVPROC)(GLuint texture, GLenum pname, const GLfloat* param);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIPROC)(GLuint texture, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIIVPROC)(GLuint texture, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIUIVPROC)(GLuint texture, GLenum pname, const GLuint* params);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIVPROC)(GLuint texture, GLenum pname, const GLint* param);
typedef void(APIENTRYP PFNGLGENERATETEXTUREMIPMAPPROC)(GLuint texture);
typedef void(APIENTRYP PFNGLBINDTEXTUREUNITPROC)(GLuint unit, GLuint texture);
typedef void(APIENTRYP PFNGLGETTEXTUREIMAGEPROC)(GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC)(GLuint texture, GLint level, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETTEXTURELEVELPARAMETERFVPROC)(GLuint texture, GLint level, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETTEXTURELEVELPARAMETERIVPROC)(GLuint texture, GLint level, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERFVPROC)(GLuint texture, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIIVPROC)(GLuint texture, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIUIVPROC)(GLuint texture, GLenum pname, GLuint* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIVPROC)(GLuint texture, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLCREATEVERTEXARRAYSPROC)(GLsizei n, GLuint* arrays);
typedef void(APIENTRYP PFNGLDISABLEVERTEXARRAYATTRIBPROC)(GLuint vaobj, GLuint index);
typedef void(APIENTRYP PFNGLENABLEVERTEXARRAYATTRIBPROC)(GLuint vaobj, GLuint index);
typedef void(APIENTRYP PFNGLVERTEXARRAYELEMENTBUFFERPROC)(GLuint vaobj, GLuint buffer);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERPROC)(GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERSPROC)(GLuint vaobj, GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets, const GLsizei* strides);
typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBBINDINGPROC)(GLuint vaobj, GLuint attribindex, GLuint bindingindex);
typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBFORMATPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBIFORMATPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBLFORMATPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXARRAYBINDINGDIVISORPROC)(GLuint vaobj, GLuint bindingindex, GLuint divisor);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYIVPROC)(GLuint vaobj, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYINDEXEDIVPROC)(GLuint vaobj, GLuint index, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYINDEXED64IVPROC)(GLuint vaobj, GLuint index, GLenum pname, GLint64* param);
typedef void(APIENTRYP PFNGLCREATESAMPLERSPROC)(GLsizei n, GLuint* samplers);
typedef void(APIENTRYP PFNGLCREATEPROGRAMPIPELINESPROC)(GLsizei n, GLuint* pipelines);
typedef void(APIENTRYP PFNGLCREATEQUERIESPROC)(GLenum target, GLsizei n, GLuint* ids);
typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTI64VPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTIVPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTUI64VPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTUIVPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void(APIENTRYP PFNGLMEMORYBARRIERBYREGIONPROC)(GLbitfield barriers);
typedef void(APIENTRYP PFNGLGETTEXTURESUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei bufSize, void* pixels);
typedef GLenum(APIENTRYP PFNGLGETGRAPHICSRESETSTATUSPROC)(void);
typedef void(APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEPROC)(GLenum target, GLint lod, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETNTEXIMAGEPROC)(GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETNUNIFORMDVPROC)(GLuint program, GLint location, GLsizei bufSize, GLdouble* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMFVPROC)(GLuint program, GLint location, GLsizei bufSize, GLfloat* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLint* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMUIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLuint* params);
typedef void(APIENTRYP PFNGLREADNPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void* data);
typedef void(APIENTRYP PFNGLTEXTUREBARRIERPROC)(void);
#endif /* GL_VERSION_4_5 */
#ifndef GL_VERSION_4_6
@ -543,10 +543,10 @@ typedef void (APIENTRYP PFNGLTEXTUREBARRIERPROC) (void);
#define GL_MAX_TEXTURE_MAX_ANISOTROPY 0x84FF
#define GL_TRANSFORM_FEEDBACK_OVERFLOW 0x82EC
#define GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW 0x82ED
typedef void (APIENTRYP PFNGLSPECIALIZESHADERPROC) (GLuint shader, const GLchar *pEntryPoint, GLuint numSpecializationConstants, const GLuint *pConstantIndex, const GLuint *pConstantValue);
typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC) (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC) (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
typedef void (APIENTRYP PFNGLPOLYGONOFFSETCLAMPPROC) (GLfloat factor, GLfloat units, GLfloat clamp);
typedef void(APIENTRYP PFNGLSPECIALIZESHADERPROC)(GLuint shader, const GLchar* pEntryPoint, GLuint numSpecializationConstants, const GLuint* pConstantIndex, const GLuint* pConstantValue);
typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC)(GLenum mode, const void* indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC)(GLenum mode, GLenum type, const void* indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
typedef void(APIENTRYP PFNGLPOLYGONOFFSETCLAMPPROC)(GLfloat factor, GLfloat units, GLfloat clamp);
#endif /* GL_VERSION_4_6 */
#ifdef __cplusplus

File diff suppressed because it is too large Load Diff

View File

@ -43,23 +43,23 @@ protected:
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
template<class T, bool masked>
template <class T, bool masked>
void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
template<class T, bool masked>
template <class T, bool masked>
__forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
template<class T, bool masked>
template <class T, bool masked>
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m);
#else
#else
template<class T, bool masked>
template <class T, bool masked>
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
#endif
#endif
public:
GSDrawScanline();
@ -78,13 +78,16 @@ public:
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
bool IsEdge() const {return m_global.sel.aa1;}
bool IsRect() const {return m_global.sel.IsSolidRect();}
bool IsEdge() const { return m_global.sel.aa1; }
bool IsRect() const { return m_global.sel.IsSolidRect(); }
template<class T> bool TestAlpha(T& test, T& fm, T& zm, const T& ga);
template<class T> void WritePixel(const T& src, int addr, int i, uint32 psm);
#endif
void PrintStats() {m_ds_map.PrintStats();}
void PrintStats()
{
m_ds_map.PrintStats();
}
};

View File

@ -26,7 +26,7 @@
#else
void GSDrawScanlineCodeGenerator::Generate()
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
Generate_AVX();
else
Generate_SSE();
@ -40,21 +40,24 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key
{
m_sel.key = key;
if(m_sel.breakpoint)
if (m_sel.breakpoint)
db(0xCC);
try {
try
{
Generate();
} catch (std::exception& e) {
}
catch (std::exception& e)
{
fprintf(stderr, "ERR:GSDrawScanlineCodeGenerator %s\n", e.what());
}
}
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uint8 shift)
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
if(shift == 0)
if (shift == 0)
{
vpmulhrsw(a, f);
}
@ -63,11 +66,10 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uin
vpsllw(a, shift + 1);
vpmulhw(a, f);
}
}
else
{
if(shift == 0 && m_cpu.has(util::Cpu::tSSSE3))
if (shift == 0 && m_cpu.has(util::Cpu::tSSSE3))
{
pmulhrsw(a, f);
}
@ -81,7 +83,7 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uin
void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift)
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
vpsubw(a, b);
modulate16(a, f, shift);
@ -97,7 +99,7 @@ void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm&
void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f)
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
vpsubw(a, b);
vpmullw(a, f);
@ -115,7 +117,7 @@ void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm
void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& temp)
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
vpblendw(a, b, 0xaa);
}
@ -127,13 +129,14 @@ void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& t
void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
vpackuswb(a, a);
#if _M_SSE >= 0x501
// Greg: why ?
if(m_cpu.has(util::Cpu::tAVX2)) {
if (m_cpu.has(util::Cpu::tAVX2))
{
ASSERT(a.isYMM());
vpermq(Ymm(a.getIdx()), Ymm(a.getIdx()), _MM_SHUFFLE(3, 1, 2, 0)); // this sucks
}
@ -152,7 +155,7 @@ void GSDrawScanlineCodeGenerator::alltrue(const Xmm& test)
{
uint32 mask = test.isYMM() ? 0xffffffff : 0xffff;
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
vpmovmskb(eax, test);
cmp(eax, mask);
@ -168,7 +171,7 @@ void GSDrawScanlineCodeGenerator::alltrue(const Xmm& test)
void GSDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& mask)
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
vpand(b, mask);
vpandn(mask, a);
@ -185,7 +188,7 @@ void GSDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& m
void GSDrawScanlineCodeGenerator::blendr(const Xmm& b, const Xmm& a, const Xmm& mask)
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
vpand(b, mask);
vpandn(mask, a);
@ -201,7 +204,7 @@ void GSDrawScanlineCodeGenerator::blendr(const Xmm& b, const Xmm& a, const Xmm&
void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b)
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
vpblendvb(a, a, b, xmm0);
else
pblendvb(a, b);
@ -209,7 +212,7 @@ void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b)
void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a)
{
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
vpblendvb(b, a, b, xmm0);
}
@ -225,15 +228,20 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const
// l = src & 0xFF; (1 left shift + 1 right shift)
// h = (src >> 8) & 0xFF; (1 right shift)
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
{
if (src == h) {
if (src == h)
{
vpsllw(l, src, 8);
vpsrlw(h, 8);
} else if (src == l) {
}
else if (src == l)
{
vpsrlw(h, src, 8);
vpsllw(l, 8);
} else {
}
else
{
vpsllw(l, src, 8);
vpsrlw(h, src, 8);
}
@ -241,11 +249,16 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const
}
else
{
if (src == h) {
if (src == h)
{
movdqa(l, src);
} else if (src == l) {
}
else if (src == l)
{
movdqa(h, src);
} else {
}
else
{
movdqa(l, src);
movdqa(h, src);
}

View File

@ -35,7 +35,7 @@ using namespace Xbyak;
class GSDrawScanlineCodeGenerator : public GSCodeGenerator
{
void operator = (const GSDrawScanlineCodeGenerator&);
void operator=(const GSDrawScanlineCodeGenerator&);
GSScanlineSelector m_sel;
GSScanlineLocalData& m_local;
@ -43,7 +43,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void Generate();
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
void Init();
void Step();
@ -71,7 +71,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void ReadTexel(int pixels, int mip_offset = 0);
void ReadTexel(const Ymm& dst, const Ymm& addr, uint8 i);
#else
#else
void Generate_SSE();
void Init_SSE();
@ -94,7 +94,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void WriteZBuf_SSE();
void AlphaBlend_SSE();
void WriteFrame_SSE();
void ReadPixel_SSE(const Xmm& dst, const RegLong& addr);
void ReadPixel_SSE(const Xmm& dst, const RegLong& addr);
void WritePixel_SSE(const Xmm& src, const RegLong& addr, const Reg8& mask, bool fast, int psm, int fz);
void WritePixel_SSE(const Xmm& src, const RegLong& addr, uint8 i, int psm);
void ReadTexel_SSE(int pixels, int mip_offset = 0);
@ -121,13 +121,13 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void WriteZBuf_AVX();
void AlphaBlend_AVX();
void WriteFrame_AVX();
void ReadPixel_AVX(const Xmm& dst, const RegLong& addr);
void ReadPixel_AVX(const Xmm& dst, const RegLong& addr);
void WritePixel_AVX(const Xmm& src, const RegLong& addr, const Reg8& mask, bool fast, int psm, int fz);
void WritePixel_AVX(const Xmm& src, const RegLong& addr, uint8 i, int psm);
void ReadTexel_AVX(int pixels, int mip_offset = 0);
void ReadTexel_AVX(const Xmm& dst, const Xmm& addr, uint8 i);
#endif
#endif
void modulate16(const Xmm& a, const Operand& f, uint8 shift);
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift);

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,8 @@
int GSRasterizerData::s_counter = 0;
static int compute_best_thread_height(int threads) {
static int compute_best_thread_height(int threads)
{
// - for more threads screen segments should be smaller to better distribute the pixels
// - but not too small to keep the threading overhead low
// - ideal value between 3 and 5, or log2(64 / number of threads)
@ -57,9 +58,9 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
int row = 0;
while(row < rows)
while (row < rows)
{
for(int i = 0; i < threads; i++, row++)
for (int i = 0; i < threads; i++, row++)
{
m_scanline[row] = i == id ? 1 : 0;
}
@ -70,7 +71,8 @@ GSRasterizer::~GSRasterizer()
{
_aligned_free(m_scanline);
if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048);
if (m_edge.buff != NULL)
vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048);
delete m_ds;
}
@ -89,9 +91,9 @@ bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
top = top >> m_thread_height;
bottom = (bottom + (1 << m_thread_height) - 1) >> m_thread_height;
while(top < bottom)
while (top < bottom)
{
if(m_scanline[top++])
if (m_scanline[top++])
{
return true;
}
@ -104,9 +106,10 @@ int GSRasterizer::FindMyNextScanline(int top) const
{
int i = top >> m_thread_height;
if(m_scanline[i] == 0)
if (m_scanline[i] == 0)
{
while(m_scanline[++i] == 0);
while (m_scanline[++i] == 0)
;
top = i << m_thread_height;
}
@ -123,7 +126,7 @@ int GSRasterizer::GetPixels(bool reset)
{
int pixels = m_pixels.sum;
if(reset)
if (reset)
{
m_pixels.sum = 0;
}
@ -135,7 +138,8 @@ void GSRasterizer::Draw(GSRasterizerData* data)
{
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
if (data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0)
return;
m_pixels.actual = 0;
m_pixels.total = 0;
@ -158,73 +162,91 @@ void GSRasterizer::Draw(GSRasterizerData* data)
m_fscissor_x = GSVector4(data->scissor).xzxz();
m_fscissor_y = GSVector4(data->scissor).ywyw();
switch(data->primclass)
switch (data->primclass)
{
case GS_POINT_CLASS:
case GS_POINT_CLASS:
if(scissor_test)
{
DrawPoint<true>(vertex, data->vertex_count, index, data->index_count);
}
else
{
DrawPoint<false>(vertex, data->vertex_count, index, data->index_count);
}
if (scissor_test)
{
DrawPoint<true>(vertex, data->vertex_count, index, data->index_count);
}
else
{
DrawPoint<false>(vertex, data->vertex_count, index, data->index_count);
}
break;
break;
case GS_LINE_CLASS:
case GS_LINE_CLASS:
if(index != NULL)
{
do {DrawLine(vertex, index); index += 2;}
while(index < index_end);
}
else
{
do {DrawLine(vertex, tmp_index); vertex += 2;}
while(vertex < vertex_end);
}
if (index != NULL)
{
do
{
DrawLine(vertex, index);
index += 2;
} while (index < index_end);
}
else
{
do
{
DrawLine(vertex, tmp_index);
vertex += 2;
} while (vertex < vertex_end);
}
break;
break;
case GS_TRIANGLE_CLASS:
case GS_TRIANGLE_CLASS:
if(index != NULL)
{
do {DrawTriangle(vertex, index); index += 3;}
while(index < index_end);
}
else
{
do {DrawTriangle(vertex, tmp_index); vertex += 3;}
while(vertex < vertex_end);
}
if (index != NULL)
{
do
{
DrawTriangle(vertex, index);
index += 3;
} while (index < index_end);
}
else
{
do
{
DrawTriangle(vertex, tmp_index);
vertex += 3;
} while (vertex < vertex_end);
}
break;
break;
case GS_SPRITE_CLASS:
case GS_SPRITE_CLASS:
if(index != NULL)
{
do {DrawSprite(vertex, index); index += 2;}
while(index < index_end);
}
else
{
do {DrawSprite(vertex, tmp_index); vertex += 2;}
while(vertex < vertex_end);
}
if (index != NULL)
{
do
{
DrawSprite(vertex, index);
index += 2;
} while (index < index_end);
}
else
{
do
{
DrawSprite(vertex, tmp_index);
vertex += 2;
} while (vertex < vertex_end);
}
break;
break;
default:
__assume(0);
default:
__assume(0);
}
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
_mm256_zeroupper();
#endif
#endif
data->pixels = m_pixels.actual;
@ -235,20 +257,20 @@ void GSRasterizer::Draw(GSRasterizerData* data)
m_ds->EndDraw(data->frame, ticks, m_pixels.actual, m_pixels.total);
}
template<bool scissor_test>
template <bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count)
{
if(index != NULL)
if (index != NULL)
{
for(int i = 0; i < index_count; i++, index++)
for (int i = 0; i < index_count; i++, index++)
{
const GSVertexSW& v = vertex[*index];
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
if (!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
if(IsOneOfMyScanlines(p.y))
if (IsOneOfMyScanlines(p.y))
{
m_ds->SetupPrim(vertex, index, GSVertexSW::zero());
@ -261,15 +283,15 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
{
uint32 tmp_index[1] = {0};
for(int i = 0; i < vertex_count; i++, vertex++)
for (int i = 0; i < vertex_count; i++, vertex++)
{
const GSVertexSW& v = vertex[0];
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
if (!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
if(IsOneOfMyScanlines(p.y))
if (IsOneOfMyScanlines(p.y))
{
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero());
@ -291,7 +313,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
int i = (dp < dp.yxwz()).mask() & 1; // |dx| <= |dy|
if(m_ds->HasEdge())
if (m_ds->HasEdge())
{
DrawEdge(v0, v1, dv, i, 0);
DrawEdge(v0, v1, dv, i, 1);
@ -303,9 +325,9 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
GSVector4i dpi(dp);
if(dpi.y == 0)
if (dpi.y == 0)
{
if(dpi.x > 0)
if (dpi.x > 0)
{
// shortcut for horizontal lines
@ -319,7 +341,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
GSVector4i p(scan.p);
if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y))
if (m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y))
{
GSVector4 lrf = scan.p.upl(v1.p.blend32(v0.p, mask)).ceil();
GSVector4 l = lrf.max(m_fscissor_x);
@ -331,7 +353,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
int pixels = right - left;
if(pixels > 0)
if (pixels > 0)
{
GSVertexSW dscan = dv / dv.p.xxxx();
@ -349,20 +371,20 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
int steps = dpi.v[i];
if(steps > 0)
if (steps > 0)
{
GSVertexSW edge = v0;
GSVertexSW dedge = dv / GSVector4(dp.v[i]);
GSVertexSW* RESTRICT e = m_edge.buff;
while(1)
while (1)
{
GSVector4i p(edge.p);
if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
if (m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
if(IsOneOfMyScanlines(p.y))
if (IsOneOfMyScanlines(p.y))
{
AddScanline(e, 1, p.x, p.y, edge);
@ -370,7 +392,8 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
}
}
if(--steps == 0) break;
if (--steps == 0)
break;
edge += dedge;
}
@ -428,7 +451,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
// if(i == 1) => y0 == y1 < y2
// if(i == 4) => y0 < y1 == y2
if(m1 == 7) return; // y0 == y1 == y2
if (m1 == 7) // y0 == y1 == y2
return;
GSVector4 tbf = y0011.xzxz(y1221).ceil();
GSVector4 tbmax = tbf.max(m_fscissor_y);
@ -447,7 +471,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
int m2 = cross.upl(cross == GSVector4::zero()).mask();
if(m2 & 2) return;
if (m2 & 2)
return;
m2 &= 1;
@ -476,9 +501,9 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
dedge.p = dv[0].p * _dxy01c.zzzz().extract<0>() - dv[1].p * _dxy01c.xxxx().extract<0>();
dedge.tc = dv[0].tc * _dxy01c.zzzz() - dv[1].tc * _dxy01c.xxxx();
if(m1 & 1)
if (m1 & 1)
{
if(tb.y < tb.w)
if (tb.y < tb.w)
{
edge = _v[i[1 - m2]];
@ -490,7 +515,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
}
else
{
if(tb.x < tb.z)
if (tb.x < tb.z)
{
edge = v0;
@ -500,7 +525,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
}
if(tb.y < tb.w)
if (tb.y < tb.w)
{
edge = v1;
@ -513,10 +538,10 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
Flush(vertex, index, (GSVertexSW&)dscan);
if(m_ds->HasEdge())
if (m_ds->HasEdge())
{
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0
int orientation = a.mask();
@ -541,7 +566,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, c
top = FindMyNextScanline(top);
while(top < bottom)
while (top < bottom)
{
GSVector8 dy(GSVector4(top) - p0.yyyy());
@ -559,7 +584,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, c
int pixels = right - left;
if(pixels > 0)
if (pixels > 0)
{
scan.tc = edge.tc + dedge.tc * dy;
@ -573,7 +598,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, c
top++;
if(!IsOneOfMyScanlines(top))
if (!IsOneOfMyScanlines(top))
{
top += (m_threads - 1) << m_thread_height;
}
@ -615,7 +640,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
// if(i == 1) => y0 == y1 < y2
// if(i == 4) => y0 < y1 == y2
if(m1 == 7) return; // y0 == y1 == y2
if (m1 == 7)
return; // y0 == y1 == y2
GSVector4 tbf = y0011.xzxz(y1221).ceil();
GSVector4 tbmax = tbf.max(m_fscissor_y);
@ -634,7 +660,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
int m2 = cross.upl(cross == GSVector4::zero()).mask();
if(m2 & 2) return;
if (m2 & 2)
return;
m2 &= 1;
@ -665,9 +692,9 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
dedge.t = dv[0].t * dxy01c.zzzz() - dv[1].t * dxy01c.xxxx();
dedge.c = dv[0].c * dxy01c.zzzz() - dv[1].c * dxy01c.xxxx();
if(m1 & 1)
if (m1 & 1)
{
if(tb.y < tb.w)
if (tb.y < tb.w)
{
edge = vertex[i[1 - m2]];
@ -679,7 +706,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
}
else
{
if(tb.x < tb.z)
if (tb.x < tb.z)
{
edge = v0;
@ -689,7 +716,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
}
if(tb.y < tb.w)
if (tb.y < tb.w)
{
edge = v1;
@ -702,10 +729,10 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
Flush(vertex, index, dscan);
if(m_ds->HasEdge())
if (m_ds->HasEdge())
{
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0
int orientation = a.mask();
@ -730,7 +757,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
top = FindMyNextScanline(top);
while(top < bottom)
while (top < bottom)
{
GSVector4 dy = GSVector4(top) - p0.yyyy();
@ -748,7 +775,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
int pixels = right - left;
if(pixels > 0)
if (pixels > 0)
{
scan.t = edge.t + dedge.t * dy;
scan.c = edge.c + dedge.c * dy;
@ -764,7 +791,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
top++;
if(!IsOneOfMyScanlines(top))
if (!IsOneOfMyScanlines(top))
{
top += (m_threads - 1) << m_thread_height;
}
@ -795,13 +822,14 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
r = r.rintersect(m_scissor);
if(r.rempty()) return;
if (r.rempty())
return;
GSVertexSW scan = v[0];
if(m_ds->IsSolidRect())
if (m_ds->IsSolidRect())
{
if(m_threads == 1)
if (m_threads == 1)
{
m_ds->DrawRect(r, scan);
@ -815,7 +843,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
int top = FindMyNextScanline(r.top);
int bottom = r.bottom;
while(top < bottom)
while (top < bottom)
{
r.top = top;
r.bottom = std::min<int>((top + (1 << m_thread_height)) & ~((1 << m_thread_height) - 1), bottom);
@ -848,19 +876,20 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
int m = (prestep == GSVector4::zero()).mask();
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
if ((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
if ((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
m_ds->SetupPrim(vertex, index, dscan);
while(1)
while (1)
{
if(IsOneOfMyScanlines(r.top))
if (IsOneOfMyScanlines(r.top))
{
DrawScanline(r.width(), r.left, r.top, scan);
}
if(++r.top >= r.bottom) break;
if (++r.top >= r.bottom)
break;
scan.t += dedge.t;
}
@ -881,7 +910,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
if(orientation)
if (orientation)
{
GSVector4 tbf = v0.p.yyyy(v1.p).ceil(); // t t b b
GSVector4 tbmax = tbf.max(m_fscissor_y); // max(t, st) max(t, sb) max(b, st) max(b, sb)
@ -892,12 +921,13 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
GSVertexSW edge, dedge;
if((dv.p >= GSVector4::zero()).mask() & 2)
if ((dv.p >= GSVector4::zero()).mask() & 2)
{
top = tb.extract32<0>(); // max(t, st)
top = tb.extract32<0>(); // max(t, st)
bottom = tb.extract32<3>(); // min(b, sb)
if(top >= bottom) return;
if (top >= bottom)
return;
edge = v0;
dedge = dv / dv.p.yyyy();
@ -906,10 +936,11 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
else
{
top = tb.extract32<1>(); // max(b, st)
top = tb.extract32<1>(); // max(b, st)
bottom = tb.extract32<2>(); // min(t, sb)
if(top >= bottom) return;
if (top >= bottom)
return;
edge = v1;
dedge = dv / dv.p.yyyy();
@ -922,14 +953,14 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
int x = p.extract32<0>();
int dx = p.extract32<1>();
if(side)
if (side)
{
while(1)
while (1)
{
int xi = x >> 16;
int xf = x & 0xffff;
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top))
if (m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top))
{
AddScanline(e, 1, xi, top, edge);
@ -938,7 +969,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
e++;
}
if(++top >= bottom) break;
if (++top >= bottom)
break;
edge += dedge;
x += dx;
@ -946,12 +978,12 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
else
{
while(1)
while (1)
{
int xi = (x >> 16) + 1;
int xf = x & 0xffff;
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top))
if (m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top))
{
AddScanline(e, 1, xi, top, edge);
@ -960,7 +992,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
e++;
}
if(++top >= bottom) break;
if (++top >= bottom)
break;
edge += dedge;
x += dx;
@ -978,12 +1011,13 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
GSVertexSW edge, dedge;
if((dv.p >= GSVector4::zero()).mask() & 1)
if ((dv.p >= GSVector4::zero()).mask() & 1)
{
left = lr.extract32<0>(); // max(l, sl)
left = lr.extract32<0>(); // max(l, sl)
right = lr.extract32<3>(); // min(r, sr)
if(left >= right) return;
if (left >= right)
return;
edge = v0;
dedge = dv / dv.p.xxxx();
@ -992,10 +1026,11 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
else
{
left = lr.extract32<1>(); // max(r, sl)
left = lr.extract32<1>(); // max(r, sl)
right = lr.extract32<2>(); // min(l, sr)
if(left >= right) return;
if (left >= right)
return;
edge = v1;
dedge = dv / dv.p.xxxx();
@ -1008,14 +1043,14 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
int y = p.extract32<2>();
int dy = p.extract32<3>();
if(side)
if (side)
{
while(1)
while (1)
{
int yi = y >> 16;
int yf = y & 0xffff;
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
if (m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
{
AddScanline(e, 1, left, yi, edge);
@ -1024,7 +1059,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
e++;
}
if(++left >= right) break;
if (++left >= right)
break;
edge += dedge;
y += dy;
@ -1032,12 +1068,12 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
else
{
while(1)
while (1)
{
int yi = (y >> 16) + 1;
int yf = y & 0xffff;
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
if (m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
{
AddScanline(e, 1, left, yi, edge);
@ -1046,7 +1082,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
e++;
}
if(++left >= right) break;
if (++left >= right)
break;
edge += dedge;
y += dy;
@ -1072,14 +1109,14 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
int count = m_edge.count;
if(count > 0)
if (count > 0)
{
m_ds->SetupPrim(vertex, index, dscan);
const GSVertexSW* RESTRICT e = m_edge.buff;
const GSVertexSW* RESTRICT ee = e + count;
if(!edge)
if (!edge)
{
do
{
@ -1088,8 +1125,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
int top = e->_pad.i32[2];
DrawScanline(pixels, left, top, *e++);
}
while(e < ee);
} while (e < ee);
}
else
{
@ -1100,8 +1136,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
int top = e->_pad.i32[2];
DrawEdge(pixels, left, top, *e++);
}
while(e < ee);
} while (e < ee);
}
m_edge.count = 0;
@ -1147,9 +1182,9 @@ GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
int row = 0;
while(row < rows)
while (row < rows)
{
for(int i = 0; i < threads; i++, row++)
for (int i = 0; i < threads; i++, row++)
{
m_scanline[row] = (uint8)i;
}
@ -1170,7 +1205,7 @@ void GSRasterizerList::Queue(const std::shared_ptr<GSRasterizerData>& data)
int top = r.top >> m_thread_height;
int bottom = std::min<int>((r.bottom + (1 << m_thread_height) - 1) >> m_thread_height, top + m_workers.size());
while(top < bottom)
while (top < bottom)
{
m_workers[m_scanline[top++]]->Push(data);
}
@ -1178,9 +1213,9 @@ void GSRasterizerList::Queue(const std::shared_ptr<GSRasterizerData>& data)
void GSRasterizerList::Sync()
{
if(!IsSynced())
if (!IsSynced())
{
for(size_t i = 0; i < m_workers.size(); i++)
for (size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Wait();
}
@ -1191,9 +1226,9 @@ void GSRasterizerList::Sync()
bool GSRasterizerList::IsSynced() const
{
for(size_t i = 0; i < m_workers.size(); i++)
for (size_t i = 0; i < m_workers.size(); i++)
{
if(!m_workers[i]->IsEmpty())
if (!m_workers[i]->IsEmpty())
{
return false;
}
@ -1206,7 +1241,7 @@ int GSRasterizerList::GetPixels(bool reset)
{
int pixels = 0;
for(size_t i = 0; i < m_workers.size(); i++)
for (size_t i = 0; i < m_workers.size(); i++)
{
pixels += m_r[i]->GetPixels(reset);
}

View File

@ -64,7 +64,8 @@ public:
virtual ~GSRasterizerData()
{
if(buff != NULL) _aligned_free(buff);
if (buff != NULL)
_aligned_free(buff);
}
};
@ -72,7 +73,7 @@ class IDrawScanline : public GSAlignedClass<32>
{
public:
typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
typedef void(__fastcall* DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
protected:
@ -82,7 +83,13 @@ protected:
DrawRectPtr m_dr;
public:
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
IDrawScanline()
: m_sp(NULL)
, m_ds(NULL)
, m_de(NULL)
, m_dr(NULL)
{
}
virtual ~IDrawScanline() {}
virtual void BeginDraw(const GSRasterizerData* data) = 0;
@ -90,10 +97,10 @@ public:
#ifdef ENABLE_JIT_RASTERIZER
__forceinline void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) {m_sp(vertex, index, dscan);}
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
__forceinline void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) { m_sp(vertex, index, dscan); }
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) { m_ds(pixels, left, top, scan); }
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) { m_de(pixels, left, top, scan); }
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) { (this->*m_dr)(r, v); }
#else
@ -106,8 +113,8 @@ public:
virtual void PrintStats() = 0;
__forceinline bool HasEdge() const {return m_de != NULL;}
__forceinline bool IsSolidRect() const {return m_dr != NULL;}
__forceinline bool HasEdge() const { return m_de != NULL; }
__forceinline bool IsSolidRect() const { return m_dr != NULL; }
};
class IRasterizer : public GSAlignedClass<32>
@ -134,22 +141,22 @@ protected:
GSVector4i m_scissor;
GSVector4 m_fscissor_x;
GSVector4 m_fscissor_y;
struct {GSVertexSW* buff; int count;} m_edge;
struct {int sum, actual, total;} m_pixels;
struct { GSVertexSW* buff; int count; } m_edge;
struct { int sum, actual, total; } m_pixels;
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
template<bool scissor_test>
template <bool scissor_test>
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count);
void DrawLine(const GSVertexSW* vertex, const uint32* index);
void DrawTriangle(const GSVertexSW* vertex, const uint32* index);
void DrawSprite(const GSVertexSW* vertex, const uint32* index);
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, const GSVertexSW2& dedge, const GSVertexSW2& dscan, const GSVector4& p0);
#else
#else
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0);
#endif
#endif
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
@ -173,9 +180,9 @@ public:
void Queue(const std::shared_ptr<GSRasterizerData>& data);
void Sync() {}
bool IsSynced() const {return true;}
bool IsSynced() const { return true; }
int GetPixels(bool reset);
void PrintStats() {m_ds->PrintStats();}
void PrintStats() { m_ds->PrintStats(); }
};
class GSRasterizerList : public IRasterizer
@ -195,23 +202,24 @@ protected:
public:
virtual ~GSRasterizerList();
template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon)
template <class DS>
static IRasterizer* Create(int threads, GSPerfMon* perfmon)
{
threads = std::max<int>(threads, 0);
if(threads == 0)
if (threads == 0)
{
return new GSRasterizer(new DS(), 0, 1, perfmon);
}
GSRasterizerList* rl = new GSRasterizerList(threads, perfmon);
for(int i = 0; i < threads; i++)
for (int i = 0; i < threads; i++)
{
rl->m_r.push_back(std::unique_ptr<GSRasterizer>(new GSRasterizer(new DS(), i, threads, perfmon)));
auto &r = *rl->m_r[i];
auto& r = *rl->m_r[i];
rl->m_workers.push_back(std::unique_ptr<GSWorker>(new GSWorker(
[&r](std::shared_ptr<GSRasterizerData> &item) { r.Draw(item.get()); })));
[&r](std::shared_ptr<GSRasterizerData>& item) { r.Draw(item.get()); })));
}
return rl;

File diff suppressed because it is too large Load Diff

View File

@ -47,7 +47,12 @@ class GSRendererSW : public GSRenderer
int m_zpsm;
bool m_using_pages;
TextureLevel m_tex[7 + 1]; // NULL terminated
enum {SyncNone, SyncSource, SyncTarget} m_syncpoint;
enum
{
SyncNone,
SyncSource,
SyncTarget
} m_syncpoint;
public:
SharedData(GSRendererSW* parent);
@ -64,7 +69,7 @@ class GSRendererSW : public GSRenderer
ConvertVertexBufferPtr m_cvb[4][2][2][2];
template<uint32 primclass, uint32 tme, uint32 fst, uint32 q_div>
template <uint32 primclass, uint32 tme, uint32 fst, uint32 q_div>
void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
protected:

View File

@ -28,61 +28,61 @@ union GSScanlineSelector
{
struct
{
uint32 fpsm:2; // 0
uint32 zpsm:2; // 2
uint32 ztst:2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g))
uint32 atst:3; // 6
uint32 afail:2; // 9
uint32 iip:1; // 11
uint32 tfx:3; // 12
uint32 tcc:1; // 15
uint32 fst:1; // 16
uint32 ltf:1; // 17
uint32 tlu:1; // 18
uint32 fge:1; // 19
uint32 date:1; // 20
uint32 abe:1; // 21
uint32 aba:2; // 22
uint32 abb:2; // 24
uint32 abc:2; // 26
uint32 abd:2; // 28
uint32 pabe:1; // 30
uint32 aa1:1; // 31
uint32 fpsm : 2; // 0
uint32 zpsm : 2; // 2
uint32 ztst : 2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g))
uint32 atst : 3; // 6
uint32 afail : 2; // 9
uint32 iip : 1; // 11
uint32 tfx : 3; // 12
uint32 tcc : 1; // 15
uint32 fst : 1; // 16
uint32 ltf : 1; // 17
uint32 tlu : 1; // 18
uint32 fge : 1; // 19
uint32 date : 1; // 20
uint32 abe : 1; // 21
uint32 aba : 2; // 22
uint32 abb : 2; // 24
uint32 abc : 2; // 26
uint32 abd : 2; // 28
uint32 pabe : 1; // 30
uint32 aa1 : 1; // 31
uint32 fwrite:1; // 32
uint32 ftest:1; // 33
uint32 rfb:1; // 34
uint32 zwrite:1; // 35
uint32 ztest:1; // 36
uint32 zoverflow:1; // 37 (z max >= 0x80000000)
uint32 zclamp:1; // 38
uint32 wms:2; // 39
uint32 wmt:2; // 41
uint32 datm:1; // 43
uint32 colclamp:1; // 44
uint32 fba:1; // 45
uint32 dthe:1; // 46
uint32 prim:2; // 47
uint32 fwrite : 1; // 32
uint32 ftest : 1; // 33
uint32 rfb : 1; // 34
uint32 zwrite : 1; // 35
uint32 ztest : 1; // 36
uint32 zoverflow : 1; // 37 (z max >= 0x80000000)
uint32 zclamp : 1; // 38
uint32 wms : 2; // 39
uint32 wmt : 2; // 41
uint32 datm : 1; // 43
uint32 colclamp : 1; // 44
uint32 fba : 1; // 45
uint32 dthe : 1; // 46
uint32 prim : 2; // 47
uint32 edge:1; // 49
uint32 tw:3; // 50 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
uint32 lcm:1; // 53
uint32 mmin:2; // 54
uint32 notest:1; // 55 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels)
uint32 edge : 1; // 49
uint32 tw : 3; // 50 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
uint32 lcm : 1; // 53
uint32 mmin : 2; // 54
uint32 notest : 1; // 55 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels)
// TODO: 1D texture flag? could save 2 texture reads and 4 lerps with bilinear, and also the texture coordinate clamp/wrap code in one direction
uint32 breakpoint:1; // Insert a trap to stop the program, helpful to stop debugger on a program
uint32 breakpoint : 1; // Insert a trap to stop the program, helpful to stop debugger on a program
};
struct
{
uint32 _pad1:22;
uint32 ababcd:8;
uint32 _pad2:2;
uint32 _pad1 : 22;
uint32 ababcd : 8;
uint32 _pad2 : 2;
uint32 fb:2;
uint32 _pad3:1;
uint32 zb:2;
uint32 fb : 2;
uint32 _pad3 : 1;
uint32 zb : 2;
};
struct
@ -94,33 +94,29 @@ union GSScanlineSelector
uint64 key;
GSScanlineSelector() = default;
GSScanlineSelector(uint64 k) : key(k) {}
GSScanlineSelector(uint64 k)
: key(k)
{
}
operator uint32() const {return lo;}
operator uint64() const {return key;}
operator uint32() const { return lo; }
operator uint64() const { return key; }
bool IsSolidRect() const
{
return prim == GS_SPRITE_CLASS
&& iip == 0
&& tfx == TFX_NONE
&& abe == 0
&& ztst <= 1
&& atst <= 1
&& date == 0
&& fge == 0;
return prim == GS_SPRITE_CLASS && iip == 0 && tfx == TFX_NONE && abe == 0 && ztst <= 1 && atst <= 1 && date == 0 && fge == 0;
}
void Print() const
{
fprintf(stderr, "fpsm:%d zpsm:%d ztst:%d ztest:%d atst:%d afail:%d iip:%d rfb:%d fb:%d zb:%d zw:%d "
"tfx:%d tcc:%d fst:%d ltf:%d tlu:%d wms:%d wmt:%d mmin:%d lcm:%d tw:%d "
"fba:%d cclamp:%d date:%d datm:%d "
"prim:%d abe:%d %d%d%d%d fge:%d dthe:%d notest:%d\n",
fpsm, zpsm, ztst, ztest, atst, afail, iip, rfb, fb, zb, zwrite,
tfx, tcc, fst, ltf, tlu, wms, wmt, mmin, lcm, tw,
fba, colclamp, date, datm,
prim, abe, aba, abb, abc, abd , fge, dthe, notest);
"tfx:%d tcc:%d fst:%d ltf:%d tlu:%d wms:%d wmt:%d mmin:%d lcm:%d tw:%d "
"fba:%d cclamp:%d date:%d datm:%d "
"prim:%d abe:%d %d%d%d%d fge:%d dthe:%d notest:%d\n",
fpsm, zpsm, ztst, ztest, atst, afail, iip, rfb, fb, zb, zwrite,
tfx, tcc, fst, ltf, tlu, wms, wmt, mmin, lcm, tw,
fba, colclamp, date, datm,
prim, abe, aba, abb, abc, abd, fge, dthe, notest);
}
};
@ -146,37 +142,37 @@ struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a p
GSVector4i aref;
GSVector4i afix;
struct {GSVector4i min, max, minmax, mask, invmask;} t; // [u] x 4 [v] x 4
struct { GSVector4i min, max, minmax, mask, invmask; } t; // [u] x 4 [v] x 4
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
uint32 fm, zm;
uint32 frb, fga;
GSVector8 mxl;
GSVector8 k; // TEX1.K * 0x10000
GSVector8 l; // TEX1.L * -0x10000
struct {GSVector8i i, f;} lod; // lcm == 1
struct { GSVector8i i, f; } lod; // lcm == 1
#else
#else
GSVector4i fm, zm;
GSVector4i frb, fga;
GSVector4 mxl;
GSVector4 k; // TEX1.K * 0x10000
GSVector4 l; // TEX1.L * -0x10000
struct {GSVector4i i, f;} lod; // lcm == 1
struct { GSVector4i i, f; } lod; // lcm == 1
#endif
#endif
};
struct alignas(32) GSScanlineLocalData // per prim variables, each thread has its own
{
#if _M_SSE >= 0x501
#if _M_SSE >= 0x501
struct skip {GSVector8 z, s, t, q; GSVector8i rb, ga, f, _pad;} d[8];
struct step {GSVector4 stq; struct {uint32 rb, ga;} c; struct {uint32 z, f;} p;} d8;
struct {GSVector8i rb, ga;} c;
struct {uint32 z, f;} p;
struct skip { GSVector8 z, s, t, q; GSVector8i rb, ga, f, _pad; } d[8];
struct step { GSVector4 stq; struct { uint32 rb, ga; } c; struct { uint32 z, f; } p; } d8;
struct { GSVector8i rb, ga; } c;
struct { uint32 z, f; } p;
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
@ -192,19 +188,19 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
// mipmapping
struct {GSVector8i i, f;} lod;
struct { GSVector8i i, f; } lod;
GSVector8i uv[2];
GSVector8i uv_minmax[2];
GSVector8i trb, tga;
GSVector8i test;
} temp;
#else
#else
struct skip {GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad;} d[4];
struct step {GSVector4 z, stq; GSVector4i c, f;} d4;
struct {GSVector4i rb, ga;} c;
struct {GSVector4i z, f;} p;
struct skip { GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad; } d[4];
struct step { GSVector4 z, stq; GSVector4i c, f; } d4;
struct { GSVector4i rb, ga; } c;
struct { GSVector4i z, f; } p;
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
@ -220,14 +216,14 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
// mipmapping
struct {GSVector4i i, f;} lod;
struct { GSVector4i i, f; } lod;
GSVector4i uv[2];
GSVector4i uv_minmax[2];
GSVector4i trb, tga;
GSVector4i test;
} temp;
#endif
#endif
//
@ -277,14 +273,14 @@ struct GSScanlineConstantData : public GSAlignedClass<32>
};
uint32 I_hate_vs2013_m_test_128b[8][4] = {
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000},
{ 0xffffffff, 0x00000000, 0x00000000, 0x00000000},
{ 0xffffffff, 0xffffffff, 0x00000000, 0x00000000},
{ 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000},
{ 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff},
{ 0x00000000, 0x00000000, 0xffffffff, 0xffffffff},
{ 0x00000000, 0x00000000, 0x00000000, 0xffffffff},
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000}
{0x00000000, 0x00000000, 0x00000000, 0x00000000},
{0xffffffff, 0x00000000, 0x00000000, 0x00000000},
{0xffffffff, 0xffffffff, 0x00000000, 0x00000000},
{0xffffffff, 0xffffffff, 0xffffffff, 0x00000000},
{0x00000000, 0xffffffff, 0xffffffff, 0xffffffff},
{0x00000000, 0x00000000, 0xffffffff, 0xffffffff},
{0x00000000, 0x00000000, 0x00000000, 0xffffffff},
{0x00000000, 0x00000000, 0x00000000, 0x00000000}
};
float I_hate_vs2013_m_shift_256b[9][8] = {
@ -319,14 +315,15 @@ struct GSScanlineConstantData : public GSAlignedClass<32>
1.0f
};
for (size_t n = 0; n < countof(log2_coef); ++n) {
for (size_t i = 0; i < 4; ++i) {
for (size_t n = 0; n < countof(log2_coef); ++n)
{
for (size_t i = 0; i < 4; ++i)
{
m_log2_coef_128b[n][i] = log2_coef[n];
m_log2_coef_256b[n][i] = log2_coef[n];
m_log2_coef_256b[n][i+4] = log2_coef[n];
m_log2_coef_256b[n][i + 4] = log2_coef[n];
}
}
}
};

View File

@ -36,16 +36,19 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0;
try {
try
{
#if _M_SSE >= 0x501
Generate_AVX2();
#else
if(m_cpu.has(util::Cpu::tAVX))
if (m_cpu.has(util::Cpu::tAVX))
Generate_AVX();
else
Generate_SSE();
#endif
} catch (std::exception& e) {
}
catch (std::exception& e)
{
fprintf(stderr, "ERR:GSSetupPrimCodeGenerator %s\n", e.what());
}
}

View File

@ -27,13 +27,16 @@
class GSSetupPrimCodeGenerator : public GSCodeGenerator
{
void operator = (const GSSetupPrimCodeGenerator&);
void operator=(const GSSetupPrimCodeGenerator&);
GSScanlineSelector m_sel;
GSScanlineLocalData& m_local;
bool m_rip;
struct {uint32 z:1, f:1, t:1, c:1;} m_en;
struct
{
uint32 z : 1, f : 1, t : 1, c : 1;
} m_en;
#if _M_SSE < 0x501
void Generate_SSE();

View File

@ -46,11 +46,11 @@ void GSSetupPrimCodeGenerator::Generate_AVX()
if (!m_rip)
mov(t0, (size_t)&m_local);
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
{
mov(rax, (size_t)g_const->m_shift_128b);
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
for (int i = 0; i < (m_sel.notest ? 2 : 5); i++)
{
vmovaps(Xmm(3 + i), ptr[rax + i * 16]);
}
@ -74,18 +74,18 @@ void GSSetupPrimCodeGenerator::Generate_AVX()
void GSSetupPrimCodeGenerator::Depth_AVX()
{
if(!m_en.z && !m_en.f)
if (!m_en.z && !m_en.f)
{
return;
}
if(m_sel.prim != GS_SPRITE_CLASS)
if (m_sel.prim != GS_SPRITE_CLASS)
{
// GSVector4 p = dscan.p;
vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, p)]);
if(m_en.f)
if (m_en.f)
{
// GSVector4 df = p.wwww();
@ -99,7 +99,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
vmovdqa(_rip_local(d4.f), xmm2);
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
{
// m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
@ -113,7 +113,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
}
}
if(m_en.z)
if (m_en.z)
{
// GSVector4 dz = p.zzzz();
@ -124,7 +124,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
vmulps(xmm1, xmm0, xmm3);
vmovdqa(_rip_local(d4.z), xmm1);
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
{
// m_local.d[i].z = dz * m_shift[i];
@ -143,7 +143,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
shl(eax, 6); // * sizeof(GSVertexSW)
add(rax, a0);
if(m_en.f)
if (m_en.f)
{
// m_local.p.f = GSVector4i(p).zzzzh().zzzz();
vmovaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]);
@ -154,7 +154,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
vmovdqa(_rip_local(p.f), xmm1);
}
if(m_en.z)
if (m_en.z)
{
// uint32 z is bypassed in t.w
@ -167,7 +167,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
void GSSetupPrimCodeGenerator::Texture_AVX()
{
if(!m_en.t)
if (!m_en.t)
{
return;
}
@ -178,7 +178,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX()
vmulps(xmm1, xmm0, xmm3);
if(m_sel.fst)
if (m_sel.fst)
{
// m_local.d4.stq = GSVector4i(t * 4.0f);
@ -193,7 +193,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX()
vmovaps(_rip_local(d4.stq), xmm1);
}
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
{
// GSVector4 ds = t.xxxx();
// GSVector4 dt = t.yyyy();
@ -201,13 +201,13 @@ void GSSetupPrimCodeGenerator::Texture_AVX()
vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j));
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
{
// GSVector4 v = ds/dt * m_shift[i];
vmulps(xmm2, xmm1, Xmm(4 + i));
if(m_sel.fst)
if (m_sel.fst)
{
// m_local.d[i].s/t = GSVector4i(v);
@ -216,10 +216,10 @@ void GSSetupPrimCodeGenerator::Texture_AVX()
const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0]));
const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0]));
switch(j)
switch (j)
{
case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), xmm2); break;
case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), xmm2); break;
case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), xmm2); break;
case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), xmm2); break;
}
}
else
@ -230,11 +230,11 @@ void GSSetupPrimCodeGenerator::Texture_AVX()
const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0]));
const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0]));
switch(j)
switch (j)
{
case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), xmm2); break;
case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), xmm2); break;
case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), xmm2); break;
case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), xmm2); break;
case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), xmm2); break;
case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), xmm2); break;
}
}
}
@ -243,12 +243,12 @@ void GSSetupPrimCodeGenerator::Texture_AVX()
void GSSetupPrimCodeGenerator::Color_AVX()
{
if(!m_en.c)
if (!m_en.c)
{
return;
}
if(m_sel.iip)
if (m_sel.iip)
{
// GSVector4 c = dscan.c;
@ -270,7 +270,7 @@ void GSSetupPrimCodeGenerator::Color_AVX()
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
{
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
@ -302,7 +302,7 @@ void GSSetupPrimCodeGenerator::Color_AVX()
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
{
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
@ -330,15 +330,15 @@ void GSSetupPrimCodeGenerator::Color_AVX()
int last = 0;
switch(m_sel.prim)
switch (m_sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
case GS_SPRITE_CLASS: last = 1; break;
}
if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
{
mov(eax, ptr[a1 + sizeof(uint32) * last]);
shl(eax, 6); // * sizeof(GSVertexSW)
@ -354,7 +354,7 @@ void GSSetupPrimCodeGenerator::Color_AVX()
// if(!tme) c = c.srl16(7);
if(m_sel.tfx == TFX_NONE)
if (m_sel.tfx == TFX_NONE)
{
vpsrlw(xmm0, 7);
}

Some files were not shown because too many files have changed in this diff Show More