GSdx: Format

This commit is contained in:
TellowKrinkle 2021-04-12 04:31:30 -05:00 committed by tellowkrinkle
parent fafbb3cc63
commit ae1bc651d6
128 changed files with 11631 additions and 9678 deletions

View File

@ -161,7 +161,8 @@ EXPORT_C GSclose()
{ {
gsopen_done = false; gsopen_done = false;
if(s_gs == NULL) return; if (s_gs == NULL)
return;
s_gs->ResetDevice(); s_gs->ResetDevice();
@ -223,7 +224,8 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
#if defined(__unix__) #if defined(__unix__)
// Note: EGL code use GLX otherwise maybe it could be also compatible with Windows // Note: EGL code use GLX otherwise maybe it could be also compatible with Windows
// Yes OpenGL code isn't complicated enough ! // Yes OpenGL code isn't complicated enough !
switch (GSWndEGL::SelectPlatform()) { switch (GSWndEGL::SelectPlatform())
{
#if GS_EGL_X11 #if GS_EGL_X11
case EGL_PLATFORM_X11_KHR: case EGL_PLATFORM_X11_KHR:
wnds.push_back(std::make_shared<GSWndEGL_X11>()); wnds.push_back(std::make_shared<GSWndEGL_X11>());
@ -389,7 +391,8 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
return -1; return -1;
} }
if (renderer == GSRendererType::OGL_HW && theApp.GetConfigI("debug_glsl_shader") == 2) { if (renderer == GSRendererType::OGL_HW && theApp.GetConfigI("debug_glsl_shader") == 2)
{
printf("GSdx: test OpenGL shader. Please wait...\n\n"); printf("GSdx: test OpenGL shader. Please wait...\n\n");
static_cast<GSDeviceOGL*>(s_gs->m_dev)->SelfShaderTest(); static_cast<GSDeviceOGL*>(s_gs->m_dev)->SelfShaderTest();
printf("\nGSdx: test OpenGL shader done. It will now exit\n"); printf("\nGSdx: test OpenGL shader done. It will now exit\n");
@ -401,12 +404,14 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t
EXPORT_C_(void) GSosdLog(const char* utf8, uint32 color) EXPORT_C_(void) GSosdLog(const char* utf8, uint32 color)
{ {
if(s_gs && s_gs->m_dev) s_gs->m_dev->m_osd.Log(utf8); if (s_gs && s_gs->m_dev)
s_gs->m_dev->m_osd.Log(utf8);
} }
EXPORT_C_(void) GSosdMonitor(const char* key, const char* value, uint32 color) EXPORT_C_(void) GSosdMonitor(const char* key, const char* value, uint32 color)
{ {
if(s_gs && s_gs->m_dev) s_gs->m_dev->m_osd.Monitor(key, value); if (s_gs && s_gs->m_dev)
s_gs->m_dev->m_osd.Monitor(key, value);
} }
EXPORT_C_(int) GSopen2(void** dsp, uint32 flags) EXPORT_C_(int) GSopen2(void** dsp, uint32 flags)
@ -430,9 +435,7 @@ EXPORT_C_(int) GSopen2(void** dsp, uint32 flags)
case GSRendererType::OGL_SW: case GSRendererType::OGL_SW:
#ifdef _WIN32 #ifdef _WIN32
{ {
const auto config_renderer = static_cast<GSRendererType>( const auto config_renderer = static_cast<GSRendererType>(theApp.GetConfigI("Renderer"));
theApp.GetConfigI("Renderer")
);
if (current_renderer == config_renderer) if (current_renderer == config_renderer)
current_renderer = GSUtil::GetBestRenderer(); current_renderer = GSUtil::GetBestRenderer();
@ -739,7 +742,8 @@ EXPORT_C GSconfigure()
{ {
try try
{ {
if(!GSUtil::CheckSSE()) return; if (!GSUtil::CheckSSE())
return;
theApp.Init(); theApp.Init();
@ -759,7 +763,8 @@ EXPORT_C GSconfigure()
// We can convince it that touching that pool would be unsafe by running all GTK calls within a CFRunLoop // We can convince it that touching that pool would be unsafe by running all GTK calls within a CFRunLoop
// (Blocks submitted to the main queue by dispatch_async are run by its CFRunLoop) // (Blocks submitted to the main queue by dispatch_async are run by its CFRunLoop)
dispatch_async(dispatch_get_main_queue(), ^{ dispatch_async(dispatch_get_main_queue(), ^{
if (RunLinuxDialog()) { if (RunLinuxDialog())
{
theApp.ReloadConfig(); theApp.ReloadConfig();
// Force a reload of the gs state // Force a reload of the gs state
theApp.SetCurrentRendererType(GSRendererType::Undefined); theApp.SetCurrentRendererType(GSRendererType::Undefined);
@ -767,15 +772,16 @@ EXPORT_C GSconfigure()
}); });
#else #else
if (RunLinuxDialog()) { if (RunLinuxDialog())
{
theApp.ReloadConfig(); theApp.ReloadConfig();
// Force a reload of the gs state // Force a reload of the gs state
theApp.SetCurrentRendererType(GSRendererType::Undefined); theApp.SetCurrentRendererType(GSRendererType::Undefined);
} }
#endif #endif
}
} catch (GSDXRecoverableError) catch (GSDXRecoverableError)
{ {
} }
} }
@ -802,7 +808,8 @@ EXPORT_C GSirqCallback(void (*irq)())
} }
} }
void pt(const char* str){ void pt(const char* str)
{
struct tm* current; struct tm* current;
time_t now; time_t now;
@ -814,12 +821,14 @@ void pt(const char* str){
EXPORT_C_(bool) GSsetupRecording(std::string& filename) EXPORT_C_(bool) GSsetupRecording(std::string& filename)
{ {
if (s_gs == NULL) { if (s_gs == NULL)
{
printf("GSdx: no s_gs for recording\n"); printf("GSdx: no s_gs for recording\n");
return false; return false;
} }
#if defined(__unix__) || defined(__APPLE__) #if defined(__unix__) || defined(__APPLE__)
if (!theApp.GetConfigB("capture_enabled")) { if (!theApp.GetConfigB("capture_enabled"))
{
printf("GSdx: Recording is disabled\n"); printf("GSdx: Recording is disabled\n");
return false; return false;
} }
@ -914,7 +923,8 @@ public:
: m_console(NULL) : m_console(NULL)
, m_title(title) , m_title(title)
{ {
if(open) Open(); if (open)
Open();
} }
Console::~Console() Console::~Console()
@ -983,10 +993,15 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
char* start = lpszCmdLine; char* start = lpszCmdLine;
char* end = NULL; char* end = NULL;
long n = strtol(lpszCmdLine, &end, 10); long n = strtol(lpszCmdLine, &end, 10);
if(end > start) {renderer = static_cast<GSRendererType>(n); lpszCmdLine = end;} if (end > start)
{
renderer = static_cast<GSRendererType>(n);
lpszCmdLine = end;
}
} }
while(*lpszCmdLine == ' ') lpszCmdLine++; while (*lpszCmdLine == ' ')
lpszCmdLine++;
::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS);
@ -1027,17 +1042,24 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
GSvsync(1); GSvsync(1);
struct Packet {uint8 type, param; uint32 size, addr; std::vector<uint8> buff;}; struct Packet
{
uint8 type, param;
uint32 size, addr;
std::vector<uint8> buff;
};
auto read_packet = [&file](uint8 type) { auto read_packet = [&file](uint8 type) {
Packet p; Packet p;
p.type = type; p.type = type;
switch(p.type) { switch (p.type)
{
case 0: case 0:
file->Read(&p.param, 1); file->Read(&p.param, 1);
file->Read(&p.size, 4); file->Read(&p.size, 4);
switch(p.param) { switch (p.param)
{
case 0: case 0:
p.buff.resize(0x4000); p.buff.resize(0x4000);
p.addr = 0x4000 - p.size; p.addr = 0x4000 - p.size;
@ -1138,7 +1160,8 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32); uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i; for (int i = 0; i < 1024 * 1024 * 4; i++)
ptr[i] = (uint8)i;
// //
@ -1275,7 +1298,8 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32); uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i; for (int i = 0; i < 1024 * 1024 * 4; i++)
ptr[i] = (uint8)i;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[PSM_PSMCT32]; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[PSM_PSMCT32];
@ -1344,7 +1368,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
return; return;
} }
struct Packet {uint8 type, param; uint32 size, addr; std::vector<uint8> buff;}; struct Packet
{
uint8 type, param;
uint32 size, addr;
std::vector<uint8> buff;
};
std::list<Packet*> packets; std::list<Packet*> packets;
std::vector<uint8> buff; std::vector<uint8> buff;
@ -1356,7 +1385,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
int finished = theApp.GetConfigI("linux_replay"); int finished = theApp.GetConfigI("linux_replay");
bool repack_dump = (finished < 0); bool repack_dump = (finished < 0);
if (theApp.GetConfigI("dump")) { if (theApp.GetConfigI("dump"))
{
fprintf(stderr, "Dump is enabled. Replay will be disabled\n"); fprintf(stderr, "Dump is enabled. Replay will be disabled\n");
finished = 1; finished = 1;
} }
@ -1365,11 +1395,13 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
void* hWnd = NULL; void* hWnd = NULL;
int err = _GSopen((void**)&hWnd, "", m_renderer); int err = _GSopen((void**)&hWnd, "", m_renderer);
if (err != 0) { if (err != 0)
{
fprintf(stderr, "Error failed to GSopen\n"); fprintf(stderr, "Error failed to GSopen\n");
return; return;
} }
if (s_gs->m_wnd == NULL) return; if (s_gs->m_wnd == NULL)
return;
{ // Read .gs content { // Read .gs content
std::string f(lpszCmdLine); std::string f(lpszCmdLine);
@ -1492,7 +1524,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
case 2: case 2:
if(buff.size() < p->size) buff.resize(p->size); if (buff.size() < p->size)
buff.resize(p->size);
GSreadFIFO2(&buff[0], p->size / 16); GSreadFIFO2(&buff[0], p->size / 16);
@ -1506,11 +1539,16 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
} }
} }
if (finished >= 200) { if (finished >= 200)
{
; // Nop for Nvidia Profiler ; // Nop for Nvidia Profiler
} else if (finished > 90) { }
else if (finished > 90)
{
sleep(1); sleep(1);
} else { }
else
{
finished--; finished--;
} }
} }
@ -1522,8 +1560,7 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
fprintf(stderr, "memory bandwith. T: %f KB/f. V: %f KB/f. U: %f KB/f\n", fprintf(stderr, "memory bandwith. T: %f KB/f. V: %f KB/f. U: %f KB/f\n",
(float)g_real_texture_upload_byte / (float)total_frame_nb, (float)g_real_texture_upload_byte / (float)total_frame_nb,
(float)g_vertex_upload_byte / (float)total_frame_nb, (float)g_vertex_upload_byte / (float)total_frame_nb,
(float)g_uniform_upload_byte/(float)total_frame_nb (float)g_uniform_upload_byte / (float)total_frame_nb);
);
#endif #endif
for (auto i = packets.begin(); i != packets.end(); i++) for (auto i = packets.begin(); i != packets.end(); i++)

View File

@ -152,7 +152,7 @@ enum GIF_FLG
GIF_FLG_PACKED = 0, GIF_FLG_PACKED = 0,
GIF_FLG_REGLIST = 1, GIF_FLG_REGLIST = 1,
GIF_FLG_IMAGE = 2, GIF_FLG_IMAGE = 2,
GIF_FLG_IMAGE2 = 3 GIF_FLG_IMAGE2 = 3,
}; };
enum GS_PSM enum GS_PSM
@ -263,14 +263,14 @@ union name \
bool operator == (const union name& r) const {return ((GSVector4i)r).eq(*this);} \ bool operator == (const union name& r) const {return ((GSVector4i)r).eq(*this);} \
bool operator != (const union name& r) const {return !((GSVector4i)r).eq(*this);} \ bool operator != (const union name& r) const {return !((GSVector4i)r).eq(*this);} \
operator GSVector4i() const {return GSVector4i::loadl(this);} \ operator GSVector4i() const {return GSVector4i::loadl(this);} \
struct { \ struct {
#define REG128(name) \ #define REG128(name) \
union name \ union name \
{ \ { \
uint64 u64[2]; \ uint64 u64[2]; \
uint32 u32[4]; \ uint32 u32[4]; \
struct { \ struct {
#define REG32_(prefix, name) REG32(prefix##name) #define REG32_(prefix, name) REG32(prefix##name)
#define REG64_(prefix, name) REG64(prefix##name) #define REG64_(prefix, name) REG64(prefix##name)
@ -282,20 +282,20 @@ union name \
#define REG32_SET(name) \ #define REG32_SET(name) \
union name \ union name \
{ \ { \
uint32 u32; \ uint32 u32;
#define REG64_SET(name) \ #define REG64_SET(name) \
union name \ union name \
{ \ { \
uint64 u64; \ uint64 u64; \
uint32 u32[2]; \ uint32 u32[2];
#define REG128_SET(name) \ #define REG128_SET(name) \
union name \ union name \
{ \ { \
__m128i m128; \ __m128i m128; \
uint64 u64[2]; \ uint64 u64[2]; \
uint32 u32[4]; \ uint32 u32[4];
#define REG_SET_END }; #define REG_SET_END };
@ -840,8 +840,10 @@ REG_END2
{ {
if (TBW < 2) if (TBW < 2)
{ {
if(PSM == PSM_PSMT8) return TW > 7 || TH > 6; if (PSM == PSM_PSMT8)
if(PSM == PSM_PSMT4) return TW > 7 || TH > 7; return TW > 7 || TH > 6;
if (PSM == PSM_PSMT4)
return TW > 7 || TH > 7;
} }
// The recast of TBW seems useless but it avoid tons of warning from GCC... // The recast of TBW seems useless but it avoid tons of warning from GCC...
@ -1121,7 +1123,13 @@ struct alignas(32) GIFPath
uint32 type; uint32 type;
GSVector4i regs; GSVector4i regs;
enum {TYPE_UNKNOWN, TYPE_ADONLY, TYPE_STQRGBAXYZF2, TYPE_STQRGBAXYZ2}; enum
{
TYPE_UNKNOWN,
TYPE_ADONLY,
TYPE_STQRGBAXYZF2,
TYPE_STQRGBAXYZ2
};
__forceinline void SetTag(const void* mem) __forceinline void SetTag(const void* mem)
{ {
@ -1137,7 +1145,8 @@ struct alignas(32) GIFPath
nloop = a & 0x7fff; nloop = a & 0x7fff;
if(nloop == 0) return; if (nloop == 0)
return;
GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though
@ -1157,30 +1166,59 @@ struct alignas(32) GIFPath
{ {
switch (nreg) switch (nreg)
{ {
case 1: break; case 1:
case 2: break; break;
case 2:
break;
case 3: case 3:
if(regs.u32[0] == 0x00040102) type = TYPE_STQRGBAXYZF2; // many games, TODO: formats mixed with NOPs (xeno2: 040f010f02, 04010f020f, mgs3: 04010f0f02, 0401020f0f, 04010f020f) // many games, TODO: formats mixed with NOPs (xeno2: 040f010f02, 04010f020f, mgs3: 04010f0f02, 0401020f0f, 04010f020f)
if(regs.u32[0] == 0x00050102) type = TYPE_STQRGBAXYZ2; // GoW (has other crazy formats, like ...030503050103) if (regs.u32[0] == 0x00040102)
type = TYPE_STQRGBAXYZF2;
// GoW (has other crazy formats, like ...030503050103)
if (regs.u32[0] == 0x00050102)
type = TYPE_STQRGBAXYZ2;
// TODO: common types with UV instead // TODO: common types with UV instead
break; break;
case 4: break; case 4:
case 5: break; break;
case 6: break; case 5:
case 7: break; break;
case 8: break; case 6:
break;
case 7:
break;
case 8:
break;
case 9: case 9:
if(regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x00000004) {type = TYPE_STQRGBAXYZF2; nreg = 3; nloop *= 3;} // ffx // ffx
if (regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x00000004)
{
type = TYPE_STQRGBAXYZF2;
nreg = 3;
nloop *= 3;
}
break;
case 10:
break;
case 11:
break; break;
case 10: break;
case 11: break;
case 12: case 12:
if(regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x04010204) {type = TYPE_STQRGBAXYZF2; nreg = 3; nloop *= 4;} // dq8 (not many, mostly 040102) // dq8 (not many, mostly 040102)
if (regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x04010204)
{
type = TYPE_STQRGBAXYZF2;
nreg = 3;
nloop *= 4;
}
break;
case 13:
break;
case 14:
break;
case 15:
break;
case 16:
break; break;
case 13: break;
case 14: break;
case 15: break;
case 16: break;
default: default:
__assume(0); __assume(0);
} }
@ -1234,7 +1272,8 @@ struct GSPrivRegSet
uint64 _pad6; uint64 _pad6;
GSRegSYNCV SYNCV; GSRegSYNCV SYNCV;
uint64 _pad7; uint64 _pad7;
struct { struct
{
GSRegDISPFB DISPFB; GSRegDISPFB DISPFB;
uint64 _pad1; uint64 _pad1;
GSRegDISPLAY DISPLAY; GSRegDISPLAY DISPLAY;
@ -1276,10 +1315,13 @@ struct GSPrivRegSet
{ {
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
{ {
if (!fp) return; if (!fp)
return;
if(i == 0 && !PMODE.EN1) continue; if (i == 0 && !PMODE.EN1)
if(i == 1 && !PMODE.EN2) continue; continue;
if (i == 1 && !PMODE.EN2)
continue;
fprintf(fp, "DISPFB[%d] BP=%05x BW=%u PSM=%u DBX=%u DBY=%u\n", fprintf(fp, "DISPFB[%d] BP=%05x BW=%u PSM=%u DBX=%u DBY=%u\n",
i, i,
@ -1287,8 +1329,7 @@ struct GSPrivRegSet
DISP[i].DISPFB.FBW, DISP[i].DISPFB.FBW,
DISP[i].DISPFB.PSM, DISP[i].DISPFB.PSM,
DISP[i].DISPFB.DBX, DISP[i].DISPFB.DBX,
DISP[i].DISPFB.DBY DISP[i].DISPFB.DBY);
);
fprintf(fp, "DISPLAY[%d] DX=%u DY=%u DW=%u DH=%u MAGH=%u MAGV=%u\n", fprintf(fp, "DISPLAY[%d] DX=%u DY=%u DW=%u DH=%u MAGH=%u MAGV=%u\n",
i, i,
@ -1297,8 +1338,7 @@ struct GSPrivRegSet
DISP[i].DISPLAY.DW, DISP[i].DISPLAY.DW,
DISP[i].DISPLAY.DH, DISP[i].DISPLAY.DH,
DISP[i].DISPLAY.MAGH, DISP[i].DISPLAY.MAGH,
DISP[i].DISPLAY.MAGV DISP[i].DISPLAY.MAGV);
);
} }
fprintf(fp, "PMODE EN1=%u EN2=%u CRTMD=%u MMOD=%u AMOD=%u SLBG=%u ALP=%u\n", fprintf(fp, "PMODE EN1=%u EN2=%u CRTMD=%u MMOD=%u AMOD=%u SLBG=%u ALP=%u\n",
@ -1308,8 +1348,7 @@ struct GSPrivRegSet
PMODE.MMOD, PMODE.MMOD,
PMODE.AMOD, PMODE.AMOD,
PMODE.SLBG, PMODE.SLBG,
PMODE.ALP PMODE.ALP);
);
fprintf(fp, "SMODE1 CLKSEL=%u CMOD=%u EX=%u GCONT=%u LC=%u NVCK=%u PCK2=%u PEHS=%u PEVS=%u PHS=%u PRST=%u PVS=%u RC=%u SINT=%u SLCK=%u SLCK2=%u SPML=%u T1248=%u VCKSEL=%u VHP=%u XPCK=%u\n", fprintf(fp, "SMODE1 CLKSEL=%u CMOD=%u EX=%u GCONT=%u LC=%u NVCK=%u PCK2=%u PEHS=%u PEVS=%u PHS=%u PRST=%u PVS=%u RC=%u SINT=%u SLCK=%u SLCK2=%u SPML=%u T1248=%u VCKSEL=%u VHP=%u XPCK=%u\n",
SMODE1.CLKSEL, SMODE1.CLKSEL,
@ -1332,29 +1371,24 @@ struct GSPrivRegSet
SMODE1.T1248, SMODE1.T1248,
SMODE1.VCKSEL, SMODE1.VCKSEL,
SMODE1.VHP, SMODE1.VHP,
SMODE1.XPCK SMODE1.XPCK);
);
fprintf(fp, "SMODE2 INT=%u FFMD=%u DPMS=%u\n", fprintf(fp, "SMODE2 INT=%u FFMD=%u DPMS=%u\n",
SMODE2.INT, SMODE2.INT,
SMODE2.FFMD, SMODE2.FFMD,
SMODE2.DPMS SMODE2.DPMS);
);
fprintf(fp, "SRFSH %08x_%08x\n", fprintf(fp, "SRFSH %08x_%08x\n",
SRFSH.u32[0], SRFSH.u32[0],
SRFSH.u32[1] SRFSH.u32[1]);
);
fprintf(fp, "SYNCH1 %08x_%08x\n", fprintf(fp, "SYNCH1 %08x_%08x\n",
SYNCH1.u32[0], SYNCH1.u32[0],
SYNCH1.u32[1] SYNCH1.u32[1]);
);
fprintf(fp, "SYNCH2 %08x_%08x\n", fprintf(fp, "SYNCH2 %08x_%08x\n",
SYNCH2.u32[0], SYNCH2.u32[0],
SYNCH2.u32[1] SYNCH2.u32[1]);
);
fprintf(fp, "SYNCV VBP=%u VBPE=%u VDP=%u VFP=%u VFPE=%u VS=%u\n", fprintf(fp, "SYNCV VBP=%u VBPE=%u VDP=%u VFP=%u VFPE=%u VS=%u\n",
SYNCV.VBP, SYNCV.VBP,
@ -1362,28 +1396,23 @@ struct GSPrivRegSet
SYNCV.VDP, SYNCV.VDP,
SYNCV.VFP, SYNCV.VFP,
SYNCV.VFPE, SYNCV.VFPE,
SYNCV.VS SYNCV.VS);
);
fprintf(fp, "CSR %08x_%08x\n", fprintf(fp, "CSR %08x_%08x\n",
CSR.u32[0], CSR.u32[0],
CSR.u32[1] CSR.u32[1]);
);
fprintf(fp, "BGCOLOR B=%u G=%u R=%u\n", fprintf(fp, "BGCOLOR B=%u G=%u R=%u\n",
BGCOLOR.B, BGCOLOR.B,
BGCOLOR.G, BGCOLOR.G,
BGCOLOR.R BGCOLOR.R);
);
fprintf(fp, "EXTBUF BP=0x%x BW=%u FBIN=%u WFFMD=%u EMODA=%u EMODC=%u WDX=%u WDY=%u\n", fprintf(fp, "EXTBUF BP=0x%x BW=%u FBIN=%u WFFMD=%u EMODA=%u EMODC=%u WDX=%u WDY=%u\n",
EXTBUF.EXBP, EXTBUF.EXBW, EXTBUF.FBIN, EXTBUF.WFFMD, EXTBUF.EXBP, EXTBUF.EXBW, EXTBUF.FBIN, EXTBUF.WFFMD,
EXTBUF.EMODA, EXTBUF.EMODC, EXTBUF.WDX, EXTBUF.WDY EXTBUF.EMODA, EXTBUF.EMODC, EXTBUF.WDX, EXTBUF.WDY);
);
fprintf(fp, "EXTDATA SX=%u SY=%u SMPH=%u SMPV=%u WW=%u WH=%u\n", fprintf(fp, "EXTDATA SX=%u SY=%u SMPH=%u SMPV=%u WW=%u WH=%u\n",
EXTDATA.SX, EXTDATA.SY, EXTDATA.SMPH, EXTDATA.SMPV, EXTDATA.WW, EXTDATA.WH EXTDATA.SX, EXTDATA.SY, EXTDATA.SMPH, EXTDATA.SMPV, EXTDATA.WW, EXTDATA.WH);
);
fprintf(fp, "EXTWRITE EN=%u\n", EXTWRITE.WRITE); fprintf(fp, "EXTWRITE EN=%u\n", EXTWRITE.WRITE);
} }
@ -1391,7 +1420,8 @@ struct GSPrivRegSet
void Dump(const std::string& filename) void Dump(const std::string& filename)
{ {
FILE* fp = fopen(filename.c_str(), "wt"); FILE* fp = fopen(filename.c_str(), "wt");
if (fp) { if (fp)
{
Dump(fp); Dump(fp);
fclose(fp); fclose(fp);
} }
@ -1400,13 +1430,34 @@ struct GSPrivRegSet
#pragma pack(pop) #pragma pack(pop)
enum {KEYPRESS=1, KEYRELEASE=2}; enum
struct GSKeyEventData {uint32 key, type;}; {
KEYPRESS = 1,
KEYRELEASE = 2
};
struct GSKeyEventData
{
uint32 key, type;
};
enum {FREEZE_LOAD=0, FREEZE_SAVE=1, FREEZE_SIZE=2}; enum
struct GSFreezeData {int size; uint8* data;}; {
FREEZE_LOAD = 0,
FREEZE_SAVE = 1,
FREEZE_SIZE = 2
};
struct GSFreezeData
{
int size;
uint8* data;
};
enum stateType {ST_WRITE, ST_TRANSFER, ST_VSYNC}; enum stateType
{
ST_WRITE,
ST_TRANSFER,
ST_VSYNC
};
enum class GSVideoMode : uint8 enum class GSVideoMode : uint8
{ {

View File

@ -21,7 +21,8 @@
#pragma once #pragma once
template<int i> class GSAlignedClass template <int i>
class GSAlignedClass
{ {
public: public:
GSAlignedClass() {} GSAlignedClass() {}

View File

@ -53,7 +53,8 @@ class GSBlock
static const GSVector4i m_uw8hmask3; static const GSVector4i m_uw8hmask3;
public: public:
template<int i, int alignment, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int i, int alignment, uint32 mask>
__forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
const uint8* RESTRICT s0 = &src[srcpitch * 0]; const uint8* RESTRICT s0 = &src[srcpitch * 0];
const uint8* RESTRICT s1 = &src[srcpitch * 1]; const uint8* RESTRICT s1 = &src[srcpitch * 1];
@ -177,7 +178,8 @@ public:
#endif #endif
} }
template<int i, int alignment> __forceinline static void WriteColumn16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int i, int alignment>
__forceinline static void WriteColumn16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
const uint8* RESTRICT s0 = &src[srcpitch * 0]; const uint8* RESTRICT s0 = &src[srcpitch * 0];
const uint8* RESTRICT s1 = &src[srcpitch * 1]; const uint8* RESTRICT s1 = &src[srcpitch * 1];
@ -250,7 +252,8 @@ public:
#endif #endif
} }
template<int i, int alignment> __forceinline static void WriteColumn8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int i, int alignment>
__forceinline static void WriteColumn8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
// TODO: read unaligned as WriteColumn32 does and try saving a few shuffles // TODO: read unaligned as WriteColumn32 does and try saving a few shuffles
@ -312,7 +315,8 @@ public:
#endif #endif
} }
template<int i, int alignment> __forceinline static void WriteColumn4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int i, int alignment>
__forceinline static void WriteColumn4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
//printf("WriteColumn4\n"); //printf("WriteColumn4\n");
@ -347,7 +351,8 @@ public:
((GSVector4i*)dst)[i * 4 + 3] = v3; ((GSVector4i*)dst)[i * 4 + 3] = v3;
} }
template<int alignment, uint32 mask> static void WriteColumn32(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int alignment, uint32 mask>
static void WriteColumn32(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
switch ((y >> 1) & 3) switch ((y >> 1) & 3)
{ {
@ -359,7 +364,8 @@ public:
} }
} }
template<int alignment> static void WriteColumn16(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int alignment>
static void WriteColumn16(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
switch ((y >> 1) & 3) switch ((y >> 1) & 3)
{ {
@ -371,7 +377,8 @@ public:
} }
} }
template<int alignment> static void WriteColumn8(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int alignment>
static void WriteColumn8(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
switch ((y >> 2) & 3) switch ((y >> 2) & 3)
{ {
@ -383,7 +390,8 @@ public:
} }
} }
template<int alignment> static void WriteColumn4(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int alignment>
static void WriteColumn4(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
switch ((y >> 2) & 3) switch ((y >> 2) & 3)
{ {
@ -395,7 +403,8 @@ public:
} }
} }
template<int alignment, uint32 mask> static void WriteBlock32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int alignment, uint32 mask>
static void WriteBlock32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
WriteColumn32<0, alignment, mask>(dst, src, srcpitch); WriteColumn32<0, alignment, mask>(dst, src, srcpitch);
src += srcpitch * 2; src += srcpitch * 2;
@ -406,7 +415,8 @@ public:
WriteColumn32<3, alignment, mask>(dst, src, srcpitch); WriteColumn32<3, alignment, mask>(dst, src, srcpitch);
} }
template<int alignment> static void WriteBlock16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int alignment>
static void WriteBlock16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
WriteColumn16<0, alignment>(dst, src, srcpitch); WriteColumn16<0, alignment>(dst, src, srcpitch);
src += srcpitch * 2; src += srcpitch * 2;
@ -417,7 +427,8 @@ public:
WriteColumn16<3, alignment>(dst, src, srcpitch); WriteColumn16<3, alignment>(dst, src, srcpitch);
} }
template<int alignment> static void WriteBlock8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int alignment>
static void WriteBlock8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
WriteColumn8<0, alignment>(dst, src, srcpitch); WriteColumn8<0, alignment>(dst, src, srcpitch);
src += srcpitch * 4; src += srcpitch * 4;
@ -428,7 +439,8 @@ public:
WriteColumn8<3, alignment>(dst, src, srcpitch); WriteColumn8<3, alignment>(dst, src, srcpitch);
} }
template<int alignment> static void WriteBlock4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template <int alignment>
static void WriteBlock4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
WriteColumn4<0, alignment>(dst, src, srcpitch); WriteColumn4<0, alignment>(dst, src, srcpitch);
src += srcpitch * 4; src += srcpitch * 4;
@ -439,7 +451,8 @@ public:
WriteColumn4<3, alignment>(dst, src, srcpitch); WriteColumn4<3, alignment>(dst, src, srcpitch);
} }
template<int i> __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) template <int i>
__forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{ {
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
@ -476,7 +489,8 @@ public:
#endif #endif
} }
template<int i> __forceinline static void ReadColumn16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) template <int i>
__forceinline static void ReadColumn16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{ {
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
@ -517,7 +531,8 @@ public:
#endif #endif
} }
template<int i> __forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) template <int i>
__forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{ {
//for(int j = 0; j < 64; j++) ((uint8*)src)[j] = (uint8)j; //for(int j = 0; j < 64; j++) ((uint8*)src)[j] = (uint8)j;
@ -583,7 +598,8 @@ public:
#endif #endif
} }
template<int i> __forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) template <int i>
__forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{ {
//printf("ReadColumn4\n"); //printf("ReadColumn4\n");
@ -990,17 +1006,20 @@ public:
#endif #endif
} }
template<bool AEM, class V> __forceinline static V Expand24to32(const V& c, const V& TA0) template <bool AEM, class V>
__forceinline static V Expand24to32(const V& c, const V& TA0)
{ {
return c | (AEM ? TA0.andnot(c == V::zero()) : TA0); // TA0 & (c != GSVector4i::zero()) return c | (AEM ? TA0.andnot(c == V::zero()) : TA0); // TA0 & (c != GSVector4i::zero())
} }
template<bool AEM, class V> __forceinline static V Expand16to32(const V& c, const V& TA0, const V& TA1) template <bool AEM, class V>
__forceinline static V Expand16to32(const V& c, const V& TA0, const V& TA1)
{ {
return ((c & m_rxxx) << 3) | ((c & m_xgxx) << 6) | ((c & m_xxbx) << 9) | (AEM ? TA0.blend8(TA1, c.sra16(15)).andnot(c == V::zero()) : TA0.blend(TA1, c.sra16(15))); return ((c & m_rxxx) << 3) | ((c & m_xgxx) << 6) | ((c & m_xxbx) << 9) | (AEM ? TA0.blend8(TA1, c.sra16(15)).andnot(c == V::zero()) : TA0.blend(TA1, c.sra16(15)));
} }
template<bool AEM> static void ExpandBlock24(const uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) template <bool AEM>
static void ExpandBlock24(const uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
@ -1047,7 +1066,8 @@ public:
#endif #endif
} }
template<bool AEM> static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs template <bool AEM>
static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs
{ {
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
@ -1361,7 +1381,8 @@ public:
{ {
uint8* s = (uint8*)src; uint8* s = (uint8*)src;
for (int j = 0; j < 8; j++, s += srcpitch) for (int j = 0; j < 8; j++, s += srcpitch)
for(int i = 0; i < 4; i++) s[i] = (columnTable32[j][i*2] & 0x0f) | (columnTable32[j][i*2+1] << 4); for (int i = 0; i < 4; i++)
s[i] = (columnTable32[j][i * 2] & 0x0f) | (columnTable32[j][i * 2 + 1] << 4);
} }
GSVector4i v4, v5, v6; GSVector4i v4, v5, v6;
@ -1536,7 +1557,8 @@ public:
#endif #endif
} }
template<bool AEM> __forceinline static void ReadAndExpandBlock24(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) template <bool AEM>
__forceinline static void ReadAndExpandBlock24(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
@ -1612,7 +1634,8 @@ public:
#endif #endif
} }
template<bool AEM> __forceinline static void ReadAndExpandBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) template <bool AEM>
__forceinline static void ReadAndExpandBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
{ {
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501

View File

@ -26,33 +26,45 @@
#ifdef _WIN32 #ifdef _WIN32
class CPinInfo : public PIN_INFO { class CPinInfo : public PIN_INFO
{
public: public:
CPinInfo() { pFilter = NULL; } CPinInfo() { pFilter = NULL; }
~CPinInfo() { if (pFilter) pFilter->Release(); } ~CPinInfo()
{
if (pFilter)
pFilter->Release();
}
}; };
class CFilterInfo : public FILTER_INFO { class CFilterInfo : public FILTER_INFO
{
public: public:
CFilterInfo() { pGraph = NULL; } CFilterInfo() { pGraph = NULL; }
~CFilterInfo() { if (pGraph) pGraph->Release(); } ~CFilterInfo()
{
if (pGraph)
pGraph->Release();
}
}; };
#define BeginEnumFilters(pFilterGraph, pEnumFilters, pBaseFilter) \ #define BeginEnumFilters(pFilterGraph, pEnumFilters, pBaseFilter) \
{CComPtr<IEnumFilters> pEnumFilters; \ { \
CComPtr<IEnumFilters> pEnumFilters; \
if(pFilterGraph && SUCCEEDED(pFilterGraph->EnumFilters(&pEnumFilters))) \ if(pFilterGraph && SUCCEEDED(pFilterGraph->EnumFilters(&pEnumFilters))) \
{ \ { \
for(CComPtr<IBaseFilter> pBaseFilter; S_OK == pEnumFilters->Next(1, &pBaseFilter, 0); pBaseFilter = NULL) \ for(CComPtr<IBaseFilter> pBaseFilter; S_OK == pEnumFilters->Next(1, &pBaseFilter, 0); pBaseFilter = NULL) \
{ \ {
#define EndEnumFilters }}} #define EndEnumFilters }}}
#define BeginEnumPins(pBaseFilter, pEnumPins, pPin) \ #define BeginEnumPins(pBaseFilter, pEnumPins, pPin) \
{CComPtr<IEnumPins> pEnumPins; \ { \
CComPtr<IEnumPins> pEnumPins; \
if(pBaseFilter && SUCCEEDED(pBaseFilter->EnumPins(&pEnumPins))) \ if(pBaseFilter && SUCCEEDED(pBaseFilter->EnumPins(&pEnumPins))) \
{ \ { \
for(CComPtr<IPin> pPin; S_OK == pEnumPins->Next(1, &pPin, 0); pPin = NULL) \ for(CComPtr<IPin> pPin; S_OK == pEnumPins->Next(1, &pPin, 0); pPin = NULL) \
{ \ {
#define EndEnumPins }}} #define EndEnumPins }}}
@ -76,9 +88,9 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
STDMETHODIMP NonDelegatingQueryInterface(REFIID riid, void** ppv) STDMETHODIMP NonDelegatingQueryInterface(REFIID riid, void** ppv)
{ {
return return riid == __uuidof(IGSSource)
riid == __uuidof(IGSSource) ? GetInterface((IGSSource*)this, ppv) : ? GetInterface((IGSSource*)this, ppv)
__super::NonDelegatingQueryInterface(riid, ppv); : __super::NonDelegatingQueryInterface(riid, ppv);
} }
class GSSourceOutputPin : public CBaseOutputPin class GSSourceOutputPin : public CBaseOutputPin
@ -126,8 +138,10 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 4; vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 4;
mt.SetFormat((uint8*)&vih, sizeof(vih)); mt.SetFormat((uint8*)&vih, sizeof(vih));
if(colorspace == 1) m_mts.insert(m_mts.begin(), mt); if (colorspace == 1)
else m_mts.push_back(mt); m_mts.insert(m_mts.begin(), mt);
else
m_mts.push_back(mt);
} }
HRESULT GSSourceOutputPin::DecideBufferSize(IMemAllocator* pAlloc, ALLOCATOR_PROPERTIES* pProperties) HRESULT GSSourceOutputPin::DecideBufferSize(IMemAllocator* pAlloc, ALLOCATOR_PROPERTIES* pProperties)
@ -173,8 +187,10 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
{ {
CheckPointer(pmt, E_POINTER); CheckPointer(pmt, E_POINTER);
if(i < 0) return E_INVALIDARG; if (i < 0)
if(i > 1) return VFW_S_NO_MORE_ITEMS; return E_INVALIDARG;
if (i > 1)
return VFW_S_NO_MORE_ITEMS;
*pmt = m_mts[i]; *pmt = m_mts[i];
@ -195,7 +211,6 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
GSSourceOutputPin* m_output; GSSourceOutputPin* m_output;
public: public:
GSSource(int w, int h, float fps, IUnknown* pUnk, HRESULT& hr, int colorspace) GSSource(int w, int h, float fps, IUnknown* pUnk, HRESULT& hr, int colorspace)
: CBaseFilter("GSSource", pUnk, this, __uuidof(this), &hr) : CBaseFilter("GSSource", pUnk, this, __uuidof(this), &hr)
, m_output(NULL) , m_output(NULL)
@ -364,7 +379,8 @@ public:
static IPin* GetFirstPin(IBaseFilter* pBF, PIN_DIRECTION dir) static IPin* GetFirstPin(IBaseFilter* pBF, PIN_DIRECTION dir)
{ {
if(!pBF) return(NULL); if (!pBF)
return nullptr;
BeginEnumPins(pBF, pEP, pPin) BeginEnumPins(pBF, pEP, pPin)
{ {
@ -374,12 +390,12 @@ static IPin* GetFirstPin(IBaseFilter* pBF, PIN_DIRECTION dir)
{ {
IPin* pRet = pPin.Detach(); IPin* pRet = pPin.Detach();
pRet->Release(); pRet->Release();
return(pRet); return pRet;
} }
} }
EndEnumPins EndEnumPins
return(NULL); return nullptr;
} }
#endif #endif
@ -470,8 +486,7 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float
} }
else else
{ {
if(FAILED(hr = m_graph->AddFilter(m_src, L"Source")) if (FAILED(hr = m_graph->AddFilter(m_src, L"Source")) || FAILED(hr = m_graph->AddFilter(dlg.m_enc, L"Encoder")))
|| FAILED(hr = m_graph->AddFilter(dlg.m_enc, L"Encoder")))
{ {
return false; return false;
} }
@ -519,7 +534,8 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float
m_size.x = theApp.GetConfigI("CaptureWidth"); m_size.x = theApp.GetConfigI("CaptureWidth");
m_size.y = theApp.GetConfigI("CaptureHeight"); m_size.y = theApp.GetConfigI("CaptureHeight");
for(int i = 0; i < m_threads; i++) { for (int i = 0; i < m_threads; i++)
{
m_workers.push_back(std::unique_ptr<GSPng::Worker>(new GSPng::Worker(&GSPng::Process))); m_workers.push_back(std::unique_ptr<GSPng::Worker>(new GSPng::Worker(&GSPng::Process)));
} }

View File

@ -71,9 +71,12 @@ class alignas(32) GSClut : public GSAlignedClass<32>
void WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); void WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); void WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template<int n> void WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); template <int n>
template<int n> void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); void WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template<int n> void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); template <int n>
void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template <int n>
void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);

View File

@ -544,10 +544,11 @@ CRC::Game CRC::Lookup(uint32 crc)
int crcDups = 0; int crcDups = 0;
for (size_t i = 0; i < countof(m_games); i++) for (size_t i = 0; i < countof(m_games); i++)
{ {
if( !IsCrcExcluded( exclusions, m_games[i].crc ) ){ if (!IsCrcExcluded(exclusions, m_games[i].crc))
if(m_map[m_games[i].crc]){ {
printf("[FIXME] GSdx: Duplicate CRC: 0x%08X: (game-id/region-id) %d/%d overrides %d/%d\n" if (m_map[m_games[i].crc])
, m_games[i].crc, m_games[i].title, m_games[i].region, m_map[m_games[i].crc]->title, m_map[m_games[i].crc]->region); {
printf("[FIXME] GSdx: Duplicate CRC: 0x%08X: (game-id/region-id) %d/%d overrides %d/%d\n", m_games[i].crc, m_games[i].title, m_games[i].region, m_map[m_games[i].crc]->title, m_map[m_games[i].crc]->region);
crcDups++; crcDups++;
} }

View File

@ -31,22 +31,29 @@ static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv)
if (wm == CLAMP_CLAMP) if (wm == CLAMP_CLAMP)
{ {
if(uv > limit) uv = limit; if (uv > limit)
uv = limit;
} }
else if (wm == CLAMP_REPEAT) else if (wm == CLAMP_REPEAT)
{ {
if(tl < 0) uv = limit; // wrap around if (tl < 0)
else if(uv > limit) uv = limit; uv = limit; // wrap around
else if (uv > limit)
uv = limit;
} }
else if (wm == CLAMP_REGION_CLAMP) else if (wm == CLAMP_REGION_CLAMP)
{ {
if(uv < minuv) uv = minuv; if (uv < minuv)
if(uv > maxuv) uv = maxuv; uv = minuv;
if (uv > maxuv)
uv = maxuv;
} }
else if (wm == CLAMP_REGION_REPEAT) else if (wm == CLAMP_REGION_REPEAT)
{ {
if(tl < 0) uv = minuv | maxuv; // wrap around, just use (any & mask) | fix if (tl < 0)
else uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask) uv = minuv | maxuv; // wrap around, just use (any & mask) | fix
else
uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask)
} }
return uv; return uv;
@ -74,7 +81,8 @@ static int extend(int uv, int size)
GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap) GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap)
{ {
if(mipmap) return TEX0; // no mipmaping allowed if (mipmap)
return TEX0; // no mipmaping allowed
// find the optimal value for TW/TH by analyzing vertex trace and clamping values, extending only for region modes where uv may be outside // find the optimal value for TW/TH by analyzing vertex trace and clamping values, extending only for region modes where uv may be outside

View File

@ -197,7 +197,8 @@ public:
{ {
// Append on purpose so env + context are merged into a single file // Append on purpose so env + context are merged into a single file
FILE* fp = fopen(filename.c_str(), "at"); FILE* fp = fopen(filename.c_str(), "at");
if (!fp) return; if (!fp)
return;
fprintf(fp, "XYOFFSET\n" fprintf(fp, "XYOFFSET\n"
"\tX:%u\n" "\tX:%u\n"

View File

@ -88,7 +88,8 @@ public:
void Dump(const std::string& filename) void Dump(const std::string& filename)
{ {
FILE* fp = fopen(filename.c_str(), "wt"); FILE* fp = fopen(filename.c_str(), "wt");
if (!fp) return; if (!fp)
return;
fprintf(fp, "PRIM\n" fprintf(fp, "PRIM\n"
"\tPRIM:%u\n" "\tPRIM:%u\n"
@ -182,8 +183,8 @@ public:
, BITBLTBUF.SBW, BITBLTBUF.SBP, BITBLTBUF.SPSM, BITBLTBUF.DBW, BITBLTBUF.DPSM, BITBLTBUF.DBP); , BITBLTBUF.SBW, BITBLTBUF.SBP, BITBLTBUF.SPSM, BITBLTBUF.DBW, BITBLTBUF.DPSM, BITBLTBUF.DBP);
fprintf(fp, "TRXDIR\n" fprintf(fp, "TRXDIR\n"
"\tXDIR:%u\n\n" "\tXDIR:%u\n\n",
, TRXDIR.XDIR); TRXDIR.XDIR);
fprintf(fp, "TRXPOS\n" fprintf(fp, "TRXPOS\n"
"\tDIRY:%u\n" "\tDIRY:%u\n"
@ -201,5 +202,4 @@ public:
fclose(fp); fclose(fp);
} }
}; };

View File

@ -122,7 +122,8 @@ GSDumpXz::GSDumpXz(const std::string& fn, uint32 crc, const GSFreezeData& fd, co
{ {
m_strm = LZMA_STREAM_INIT; m_strm = LZMA_STREAM_INIT;
lzma_ret ret = lzma_easy_encoder(&m_strm, 6 /*level*/, LZMA_CHECK_CRC64); lzma_ret ret = lzma_easy_encoder(&m_strm, 6 /*level*/, LZMA_CHECK_CRC64);
if (ret != LZMA_OK) { if (ret != LZMA_OK)
{
fprintf(stderr, "GSDumpXz: Error initializing LZMA encoder ! (error code %u)\n", ret); fprintf(stderr, "GSDumpXz: Error initializing LZMA encoder ! (error code %u)\n", ret);
return; return;
} }
@ -176,13 +177,15 @@ void GSDumpXz::Flush()
void GSDumpXz::Compress(lzma_action action, lzma_ret expected_status) void GSDumpXz::Compress(lzma_action action, lzma_ret expected_status)
{ {
std::vector<uint8> out_buff(1024 * 1024); std::vector<uint8> out_buff(1024 * 1024);
do { do
{
m_strm.next_out = out_buff.data(); m_strm.next_out = out_buff.data();
m_strm.avail_out = out_buff.size(); m_strm.avail_out = out_buff.size();
lzma_ret ret = lzma_code(&m_strm, action); lzma_ret ret = lzma_code(&m_strm, action);
if (ret != expected_status) { if (ret != expected_status)
{
fprintf(stderr, "GSDumpXz: Error %d\n", (int)ret); fprintf(stderr, "GSDumpXz: Error %d\n", (int)ret);
return; return;
} }

View File

@ -85,7 +85,8 @@ GSLocalMemory::GSLocalMemory()
: m_clut(this) : m_clut(this)
{ {
m_use_fifo_alloc = theApp.GetConfigB("UserHacks") && theApp.GetConfigB("wrap_gs_mem"); m_use_fifo_alloc = theApp.GetConfigB("UserHacks") && theApp.GetConfigB("wrap_gs_mem");
switch (theApp.GetCurrentRendererType()) { switch (theApp.GetCurrentRendererType())
{
case GSRendererType::OGL_SW: case GSRendererType::OGL_SW:
m_use_fifo_alloc = true; m_use_fifo_alloc = true;
break; break;
@ -240,7 +241,8 @@ GSLocalMemory::GSLocalMemory()
m_psm[i].pal = 0; m_psm[i].pal = 0;
m_psm[i].bs = GSVector2i(8, 8); m_psm[i].bs = GSVector2i(8, 8);
m_psm[i].pgs = GSVector2i(64, 32); m_psm[i].pgs = GSVector2i(64, 32);
for(int j = 0; j < 8; j++) m_psm[i].rowOffset[j] = rowOffset32; for (int j = 0; j < 8; j++)
m_psm[i].rowOffset[j] = rowOffset32;
m_psm[i].blockOffset = blockOffset32; m_psm[i].blockOffset = blockOffset32;
m_psm[i].msk = 0xff; m_psm[i].msk = 0xff;
m_psm[i].depth = 0; m_psm[i].depth = 0;
@ -435,7 +437,8 @@ GSLocalMemory::GSLocalMemory()
m_psm[PSM_PSMT8].pal = m_psm[PSM_PSMT8H].pal = 256; m_psm[PSM_PSMT8].pal = m_psm[PSM_PSMT8H].pal = 256;
m_psm[PSM_PSMT4].pal = m_psm[PSM_PSMT4HL].pal = m_psm[PSM_PSMT4HH].pal = 16; m_psm[PSM_PSMT4].pal = m_psm[PSM_PSMT4HL].pal = m_psm[PSM_PSMT4HH].pal = 16;
for(size_t i = 0; i < countof(m_psm); i++) m_psm[i].fmt = 3; for (size_t i = 0; i < countof(m_psm); i++)
m_psm[i].fmt = 3;
m_psm[PSM_PSMCT32].fmt = m_psm[PSM_PSMZ32].fmt = 0; m_psm[PSM_PSMCT32].fmt = m_psm[PSM_PSMZ32].fmt = 0;
m_psm[PSM_PSMCT24].fmt = m_psm[PSM_PSMZ24].fmt = 1; m_psm[PSM_PSMCT24].fmt = m_psm[PSM_PSMZ24].fmt = 1;
m_psm[PSM_PSMCT16].fmt = m_psm[PSM_PSMZ16].fmt = 2; m_psm[PSM_PSMCT16].fmt = m_psm[PSM_PSMZ16].fmt = 2;
@ -493,9 +496,12 @@ GSLocalMemory::~GSLocalMemory()
else else
vmfree(m_vm8, m_vmsize * 4); vmfree(m_vm8, m_vmsize * 4);
for(auto &i : m_omap) delete i.second; for (auto& i : m_omap)
for(auto &i : m_pomap) _aligned_free(i.second); delete i.second;
for(auto &i : m_po4map) _aligned_free(i.second); for (auto& i : m_pomap)
_aligned_free(i.second);
for (auto& i : m_po4map)
_aligned_free(i.second);
for (auto& i : m_p2tmap) for (auto& i : m_p2tmap)
{ {
@ -853,12 +859,14 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
break; break;
case PSM_PSMT8: case PSM_PSMT8:
GSBlock::ReadColumn8(y, dst, buff, 16); GSBlock::ReadColumn8(y, dst, buff, 16);
for(int i = 0, j = y2; i < h2; i++, j++) memcpy(&buff[j * 16], &src[i * srcpitch + x], 16); for (int i = 0, j = y2; i < h2; i++, j++)
memcpy(&buff[j * 16], &src[i * srcpitch + x], 16);
GSBlock::WriteColumn8<32>(y, dst, buff, 16); GSBlock::WriteColumn8<32>(y, dst, buff, 16);
break; break;
case PSM_PSMT4: case PSM_PSMT4:
GSBlock::ReadColumn4(y, dst, buff, 16); GSBlock::ReadColumn4(y, dst, buff, 16);
for(int i = 0, j = y2; i < h2; i++, j++) memcpy(&buff[j * 16], &src[i * srcpitch + (x >> 1)], 16); for (int i = 0, j = y2; i < h2; i++, j++)
memcpy(&buff[j * 16], &src[i * srcpitch + (x >> 1)], 16);
GSBlock::WriteColumn4<32>(y, dst, buff, 16); GSBlock::WriteColumn4<32>(y, dst, buff, 16);
break; break;
// TODO // TODO
@ -940,12 +948,14 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
break; break;
case PSM_PSMT8: case PSM_PSMT8:
GSBlock::ReadColumn8(y, dst, buff, 16); GSBlock::ReadColumn8(y, dst, buff, 16);
for(int i = 0; i < h; i++) memcpy(&buff[i * 16], &src[i * srcpitch + x], 16); for (int i = 0; i < h; i++)
memcpy(&buff[i * 16], &src[i * srcpitch + x], 16);
GSBlock::WriteColumn8<32>(y, dst, buff, 16); GSBlock::WriteColumn8<32>(y, dst, buff, 16);
break; break;
case PSM_PSMT4: case PSM_PSMT4:
GSBlock::ReadColumn4(y, dst, buff, 16); GSBlock::ReadColumn4(y, dst, buff, 16);
for(int i = 0; i < h; i++) memcpy(&buff[i * 16], &src[i * srcpitch + (x >> 1)], 16); for (int i = 0; i < h; i++)
memcpy(&buff[i * 16], &src[i * srcpitch + (x >> 1)], 16);
GSBlock::WriteColumn4<32>(y, dst, buff, 16); GSBlock::WriteColumn4<32>(y, dst, buff, 16);
break; break;
// TODO // TODO
@ -959,7 +969,8 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
template <int psm, int bsx, int bsy, int trbpp> template <int psm, int bsx, int bsy, int trbpp>
void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
{ {
if(TRXREG.RRW == 0) return; if (TRXREG.RRW == 0)
return;
int l = (int)TRXPOS.DSAX; int l = (int)TRXPOS.DSAX;
int r = l + (int)TRXREG.RRW; int r = l + (int)TRXREG.RRW;
@ -1076,7 +1087,8 @@ static bool IsTopLeftAligned(int dsax, int tx, int ty, int bw, int bh)
void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
{ {
if(TRXREG.RRW == 0) return; if (TRXREG.RRW == 0)
return;
uint32 bp = BITBLTBUF.DBP; uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW; uint32 bw = BITBLTBUF.DBW;
@ -1110,7 +1122,8 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GI
void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
{ {
if(TRXREG.RRW == 0) return; if (TRXREG.RRW == 0)
return;
uint32 bp = BITBLTBUF.DBP; uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW; uint32 bw = BITBLTBUF.DBW;
@ -1144,7 +1157,8 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GI
void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
{ {
if(TRXREG.RRW == 0) return; if (TRXREG.RRW == 0)
return;
uint32 bp = BITBLTBUF.DBP; uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW; uint32 bw = BITBLTBUF.DBW;
@ -1178,7 +1192,8 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, G
void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
{ {
if(TRXREG.RRW == 0) return; if (TRXREG.RRW == 0)
return;
uint32 bp = BITBLTBUF.DBP; uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW; uint32 bw = BITBLTBUF.DBW;
@ -1212,7 +1227,8 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, G
void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
{ {
if(TRXREG.RRW == 0) return; if (TRXREG.RRW == 0)
return;
uint32 bp = BITBLTBUF.DBP; uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW; uint32 bw = BITBLTBUF.DBW;
@ -1246,7 +1262,8 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, G
void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
{ {
if(len <= 0) return; if (len <= 0)
return;
const uint8* pb = (uint8*)src; const uint8* pb = (uint8*)src;
const uint16* pw = (uint16*)src; const uint16* pw = (uint16*)src;
@ -1278,7 +1295,11 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel32(addr + offset[x], *pd); WritePixel32(addr + offset[x], *pd);
} }
if(x >= ex) {x = sx; y++;} if (x >= ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1298,7 +1319,11 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel24(addr + offset[x], *(uint32*)pb); WritePixel24(addr + offset[x], *(uint32*)pb);
} }
if(x >= ex) {x = sx; y++;} if (x >= ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1320,7 +1345,11 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel16(addr + offset[x], *pw); WritePixel16(addr + offset[x], *pw);
} }
if(x >= ex) {x = sx; y++;} if (x >= ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1337,7 +1366,11 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel8(addr + offset[x], *pb); WritePixel8(addr + offset[x], *pb);
} }
if(x >= ex) {x = sx; y++;} if (x >= ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1355,7 +1388,11 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel4(addr + offset[x + 1], *pb >> 4); WritePixel4(addr + offset[x + 1], *pb >> 4);
} }
if(x >= ex) {x = sx; y++;} if (x >= ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1372,7 +1409,11 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel8H(addr + offset[x], *pb); WritePixel8H(addr + offset[x], *pb);
} }
if(x >= ex) {x = sx; y++;} if (x >= ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1390,7 +1431,11 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel4HL(addr + offset[x + 1], *pb >> 4); WritePixel4HL(addr + offset[x + 1], *pb >> 4);
} }
if(x >= ex) {x = sx; y++;} if (x >= ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1408,7 +1453,11 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
WritePixel4HH(addr + offset[x + 1], *pb >> 4); WritePixel4HH(addr + offset[x + 1], *pb >> 4);
} }
if(x >= ex) {x = sx; y++;} if (x >= ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1422,7 +1471,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const
{ {
if(len <= 0) return; if (len <= 0)
return;
uint8* RESTRICT pb = (uint8*)dst; uint8* RESTRICT pb = (uint8*)dst;
uint16* RESTRICT pw = (uint16*)dst; uint16* RESTRICT pw = (uint16*)dst;
@ -1467,7 +1517,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
GSVector4i::store<false>(&pd[0], GSVector4i::load(&ps[off + 0], &ps[off + 4])); GSVector4i::store<false>(&pd[0], GSVector4i::load(&ps[off + 0], &ps[off + 4]));
GSVector4i::store<false>(&pd[4], GSVector4i::load(&ps[off + 8], &ps[off + 12])); GSVector4i::store<false>(&pd[4], GSVector4i::load(&ps[off + 8], &ps[off + 12]));
for(int i = 0; i < 8; i++) ASSERT(pd[i] == ps[offset[x + i]]); for (int i = 0; i < 8; i++)
ASSERT(pd[i] == ps[offset[x + i]]);
} }
for (; len > 0 && x < ex; len--, x++, pd++) for (; len > 0 && x < ex; len--, x++, pd++)
@ -1475,7 +1526,11 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
*pd = ps[offset[x]]; *pd = ps[offset[x]];
} }
if(x == ex) {x = sx; y++;} if (x == ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1499,7 +1554,11 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
pb[2] = (uint8)(c >> 16); pb[2] = (uint8)(c >> 16);
} }
if(x == ex) {x = sx; y++;} if (x == ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1529,7 +1588,11 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
*pw = ps[offset[x]]; *pw = ps[offset[x]];
} }
if(x == ex) {x = sx; y++;} if (x == ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1554,7 +1617,11 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
*pb = ps[offset[x]]; *pb = ps[offset[x]];
} }
if(x == ex) {x = sx; y++;} if (x == ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1571,7 +1638,11 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
*pb = (uint8)(ReadPixel4(addr + offset[x + 0]) | (ReadPixel4(addr + offset[x + 1]) << 4)); *pb = (uint8)(ReadPixel4(addr + offset[x + 0]) | (ReadPixel4(addr + offset[x + 1]) << 4));
} }
if(x == ex) {x = sx; y++;} if (x == ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1596,7 +1667,11 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
*pb = (uint8)(ps[offset[x]] >> 24); *pb = (uint8)(ps[offset[x]] >> 24);
} }
if(x == ex) {x = sx; y++;} if (x == ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1616,7 +1691,11 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
*pb = (uint8)(c0 | c1); *pb = (uint8)(c0 | c1);
} }
if(x == ex) {x = sx; y++;} if (x == ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1636,7 +1715,11 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
*pb = (uint8)(c0 | c1); *pb = (uint8)(c0 | c1);
} }
if(x == ex) {x = sx; y++;} if (x == ex)
{
x = sx;
y++;
}
} }
break; break;
@ -1687,10 +1770,12 @@ void GSLocalMemory::ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVecto
// Convert packed RGB scanline to 32 bits RGBA // Convert packed RGB scanline to 32 bits RGBA
ASSERT(dstpitch >= r.width() * 4); ASSERT(dstpitch >= r.width() * 4);
for(int y = r.top; y < r.bottom; y ++) { for (int y = r.top; y < r.bottom; y++)
{
uint8* line = dst + y * dstpitch; uint8* line = dst + y * dstpitch;
for(int x = r.right; x >= r.left; x--) { for (int x = r.right; x >= r.left; x--)
{
*(uint32*)&line[x * 4] = *(uint32*)&line[x * 3] & 0xFFFFFF; *(uint32*)&line[x * 4] = *(uint32*)&line[x * 3] & 0xFFFFFF;
} }
} }
@ -1852,9 +1937,7 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i&
readTexel rt = psm.rt; readTexel rt = psm.rt;
readTexture rtx = psm.rtx; readTexture rtx = psm.rtx;
if(r.width() < psm.bs.x || r.height() < psm.bs.y if (r.width() < psm.bs.x || r.height() < psm.bs.y || (r.left & (psm.bs.x - 1)) || (r.top & (psm.bs.y - 1)) || (r.right & (psm.bs.x - 1)) || (r.bottom & (psm.bs.y - 1)))
|| (r.left & (psm.bs.x - 1)) || (r.top & (psm.bs.y - 1))
|| (r.right & (psm.bs.x - 1)) || (r.bottom & (psm.bs.y - 1)))
{ {
GIFRegTEX0 TEX0; GIFRegTEX0 TEX0;
@ -1870,7 +1953,8 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i&
{ {
// TODO: expand r to block size, read into temp buffer // TODO: expand r to block size, read into temp buffer
if(!aligned) printf("unaligned memory pointer passed to ReadTexture\n"); if (!aligned)
printf("unaligned memory pointer passed to ReadTexture\n");
for (int y = r.top; y < r.bottom; y++, dst += dstpitch) for (int y = r.top; y < r.bottom; y++, dst += dstpitch)
{ {
@ -2085,7 +2169,8 @@ uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bb
GSVector4i r = rect.ralign<Align_Outside>(bs); GSVector4i r = rect.ralign<Align_Outside>(bs);
if(bbox != NULL) *bbox = r; if (bbox != NULL)
*bbox = r;
// worst case: // worst case:
// bp page-aligned: (w * h) / (64 * 32) // bp page-aligned: (w * h) / (64 * 32)

View File

@ -164,7 +164,10 @@ protected:
__forceinline static uint32 Expand16To32(uint16 c, const GIFRegTEXA& TEXA) __forceinline static uint32 Expand16To32(uint16 c, const GIFRegTEXA& TEXA)
{ {
return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3); return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24)
| ((c & 0x7c00) << 9)
| ((c & 0x03e0) << 6)
| ((c & 0x001f) << 3);
} }
// TODO // TODO
@ -557,7 +560,8 @@ public:
__forceinline void WritePixel4(uint32 addr, uint32 c) __forceinline void WritePixel4(uint32 addr, uint32 c)
{ {
int shift = (addr & 1) << 2; addr >>= 1; int shift = (addr & 1) << 2;
addr >>= 1;
m_vm8[addr] = (uint8)((m_vm8[addr] & (0xf0 >> shift)) | ((c & 0x0f) << shift)); m_vm8[addr] = (uint8)((m_vm8[addr] & (0xf0 >> shift)) | ((c & 0x0f) << shift));
} }
@ -913,10 +917,10 @@ public:
// //
template<typename T> void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); template <typename T>
void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
// //
void SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h); void SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h);
}; };

View File

@ -21,32 +21,36 @@
#include "stdafx.h" #include "stdafx.h"
#include "GSLzma.h" #include "GSLzma.h"
GSDumpFile::GSDumpFile(char* filename, const char* repack_filename) { GSDumpFile::GSDumpFile(char* filename, const char* repack_filename)
{
m_fp = fopen(filename, "rb"); m_fp = fopen(filename, "rb");
if (m_fp == nullptr) { if (m_fp == nullptr)
{
fprintf(stderr, "failed to open %s\n", filename); fprintf(stderr, "failed to open %s\n", filename);
throw "BAD"; // Just exit the program throw "BAD"; // Just exit the program
} }
m_repack_fp = nullptr; m_repack_fp = nullptr;
if (repack_filename) { if (repack_filename)
{
m_repack_fp = fopen(repack_filename, "wb"); m_repack_fp = fopen(repack_filename, "wb");
if (m_repack_fp == nullptr) if (m_repack_fp == nullptr)
fprintf(stderr, "failed to open %s for repack\n", repack_filename); fprintf(stderr, "failed to open %s for repack\n", repack_filename);
} }
} }
void GSDumpFile::Repack(void* ptr, size_t size) { void GSDumpFile::Repack(void* ptr, size_t size)
{
if (m_repack_fp == nullptr) if (m_repack_fp == nullptr)
return; return;
size_t ret = fwrite(ptr, 1, size, m_repack_fp); size_t ret = fwrite(ptr, 1, size, m_repack_fp);
if (ret != size) if (ret != size)
fprintf(stderr, "Failed to repack\n"); fprintf(stderr, "Failed to repack\n");
} }
GSDumpFile::~GSDumpFile() { GSDumpFile::~GSDumpFile()
{
if (m_fp) if (m_fp)
fclose(m_fp); fclose(m_fp);
if (m_repack_fp) if (m_repack_fp)
@ -54,13 +58,16 @@ GSDumpFile::~GSDumpFile() {
} }
/******************************************************************/ /******************************************************************/
GSDumpLzma::GSDumpLzma(char* filename, const char* repack_filename) : GSDumpFile(filename, repack_filename) { GSDumpLzma::GSDumpLzma(char* filename, const char* repack_filename)
: GSDumpFile(filename, repack_filename)
{
memset(&m_strm, 0, sizeof(lzma_stream)); memset(&m_strm, 0, sizeof(lzma_stream));
lzma_ret ret = lzma_stream_decoder(&m_strm, UINT32_MAX, 0); lzma_ret ret = lzma_stream_decoder(&m_strm, UINT32_MAX, 0);
if (ret != LZMA_OK) { if (ret != LZMA_OK)
{
fprintf(stderr, "Error initializing the decoder! (error code %u)\n", ret); fprintf(stderr, "Error initializing the decoder! (error code %u)\n", ret);
throw "BAD"; // Just exit the program throw "BAD"; // Just exit the program
} }
@ -78,18 +85,21 @@ GSDumpLzma::GSDumpLzma(char* filename, const char* repack_filename) : GSDumpFile
m_strm.next_out = m_area; m_strm.next_out = m_area;
} }
void GSDumpLzma::Decompress() { void GSDumpLzma::Decompress()
{
lzma_action action = LZMA_RUN; lzma_action action = LZMA_RUN;
m_strm.next_out = m_area; m_strm.next_out = m_area;
m_strm.avail_out = m_buff_size; m_strm.avail_out = m_buff_size;
// Nothing left in the input buffer. Read data from the file // Nothing left in the input buffer. Read data from the file
if (m_strm.avail_in == 0 && !feof(m_fp)) { if (m_strm.avail_in == 0 && !feof(m_fp))
{
m_strm.next_in = m_inbuf; m_strm.next_in = m_inbuf;
m_strm.avail_in = fread(m_inbuf, 1, BUFSIZ, m_fp); m_strm.avail_in = fread(m_inbuf, 1, BUFSIZ, m_fp);
if (ferror(m_fp)) { if (ferror(m_fp))
{
fprintf(stderr, "Read error: %s\n", strerror(errno)); fprintf(stderr, "Read error: %s\n", strerror(errno));
throw "BAD"; // Just exit the program throw "BAD"; // Just exit the program
} }
@ -97,10 +107,12 @@ void GSDumpLzma::Decompress() {
lzma_ret ret = lzma_code(&m_strm, action); lzma_ret ret = lzma_code(&m_strm, action);
if (ret != LZMA_OK) { if (ret != LZMA_OK)
{
if (ret == LZMA_STREAM_END) if (ret == LZMA_STREAM_END)
fprintf(stderr, "LZMA decoder finished without error\n\n"); fprintf(stderr, "LZMA decoder finished without error\n\n");
else { else
{
fprintf(stderr, "Decoder error: (error code %u)\n", ret); fprintf(stderr, "Decoder error: (error code %u)\n", ret);
throw "BAD"; // Just exit the program throw "BAD"; // Just exit the program
} }
@ -110,16 +122,20 @@ void GSDumpLzma::Decompress() {
m_avail = m_buff_size - m_strm.avail_out; m_avail = m_buff_size - m_strm.avail_out;
} }
bool GSDumpLzma::IsEof() { bool GSDumpLzma::IsEof()
{
return feof(m_fp) && m_avail == 0 && m_strm.avail_in == 0; return feof(m_fp) && m_avail == 0 && m_strm.avail_in == 0;
} }
bool GSDumpLzma::Read(void* ptr, size_t size) { bool GSDumpLzma::Read(void* ptr, size_t size)
{
size_t off = 0; size_t off = 0;
uint8_t* dst = (uint8_t*)ptr; uint8_t* dst = (uint8_t*)ptr;
size_t full_size = size; size_t full_size = size;
while (size && !IsEof()) { while (size && !IsEof())
if (m_avail == 0) { {
if (m_avail == 0)
{
Decompress(); Decompress();
} }
@ -131,7 +147,8 @@ bool GSDumpLzma::Read(void* ptr, size_t size) {
off += l; off += l;
} }
if (size == 0) { if (size == 0)
{
Repack(ptr, full_size); Repack(ptr, full_size);
return true; return true;
} }
@ -139,7 +156,8 @@ bool GSDumpLzma::Read(void* ptr, size_t size) {
return false; return false;
} }
GSDumpLzma::~GSDumpLzma() { GSDumpLzma::~GSDumpLzma()
{
lzma_end(&m_strm); lzma_end(&m_strm);
if (m_inbuf) if (m_inbuf)
@ -150,26 +168,32 @@ GSDumpLzma::~GSDumpLzma() {
/******************************************************************/ /******************************************************************/
GSDumpRaw::GSDumpRaw(char* filename, const char* repack_filename) : GSDumpFile(filename, repack_filename) { GSDumpRaw::GSDumpRaw(char* filename, const char* repack_filename)
: GSDumpFile(filename, repack_filename)
{
m_buff_size = 0; m_buff_size = 0;
m_area = NULL; m_area = nullptr;
m_inbuf = NULL; m_inbuf = nullptr;
m_avail = 0; m_avail = 0;
m_start = 0; m_start = 0;
} }
bool GSDumpRaw::IsEof() { bool GSDumpRaw::IsEof()
{
return !!feof(m_fp); return !!feof(m_fp);
} }
bool GSDumpRaw::Read(void* ptr, size_t size) { bool GSDumpRaw::Read(void* ptr, size_t size)
{
size_t ret = fread(ptr, 1, size, m_fp); size_t ret = fread(ptr, 1, size, m_fp);
if (ret != size && ferror(m_fp)) { if (ret != size && ferror(m_fp))
{
fprintf(stderr, "GSDumpRaw:: Read error (%zu/%zu)\n", ret, size); fprintf(stderr, "GSDumpRaw:: Read error (%zu/%zu)\n", ret, size);
throw "BAD"; // Just exit the program throw "BAD"; // Just exit the program
} }
if (ret == size) { if (ret == size)
{
Repack(ptr, size); Repack(ptr, size);
return true; return true;
} }

View File

@ -20,7 +20,8 @@
#include <lzma.h> #include <lzma.h>
class GSDumpFile { class GSDumpFile
{
FILE* m_repack_fp; FILE* m_repack_fp;
protected: protected:
@ -36,8 +37,8 @@ class GSDumpFile {
virtual ~GSDumpFile(); virtual ~GSDumpFile();
}; };
class GSDumpLzma : public GSDumpFile { class GSDumpLzma : public GSDumpFile
{
lzma_stream m_strm; lzma_stream m_strm;
size_t m_buff_size; size_t m_buff_size;
@ -50,7 +51,6 @@ class GSDumpLzma : public GSDumpFile {
void Decompress(); void Decompress();
public: public:
GSDumpLzma(char* filename, const char* repack_filename); GSDumpLzma(char* filename, const char* repack_filename);
virtual ~GSDumpLzma(); virtual ~GSDumpLzma();
@ -58,8 +58,8 @@ class GSDumpLzma : public GSDumpFile {
bool Read(void* ptr, size_t size) final; bool Read(void* ptr, size_t size) final;
}; };
class GSDumpRaw : public GSDumpFile { class GSDumpRaw : public GSDumpFile
{
size_t m_buff_size; size_t m_buff_size;
uint8_t* m_area; uint8_t* m_area;
uint8_t* m_inbuf; uint8_t* m_inbuf;
@ -68,7 +68,6 @@ class GSDumpRaw : public GSDumpFile {
size_t m_start; size_t m_start;
public: public:
GSDumpRaw(char* filename, const char* repack_filename); GSDumpRaw(char* filename, const char* repack_filename);
virtual ~GSDumpRaw() = default; virtual ~GSDumpRaw() = default;

View File

@ -35,7 +35,14 @@ public:
enum counter_t enum counter_t
{ {
Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad, SyncPoint, Frame,
Prim,
Draw,
Swizzle,
Unswizzle,
Fillrate,
Quad,
SyncPoint,
CounterLast, CounterLast,
}; };
@ -70,6 +77,10 @@ class GSPerfMonAutoTimer
int m_timer; int m_timer;
public: public:
GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main) {m_timer = timer; (m_pm = pm)->Start(m_timer);} GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main)
{
m_timer = timer;
(m_pm = pm)->Start(m_timer);
}
~GSPerfMonAutoTimer() { m_pm->Stop(m_timer); } ~GSPerfMonAutoTimer() { m_pm->Stop(m_timer); }
}; };

View File

@ -23,7 +23,8 @@
#include <zlib.h> #include <zlib.h>
#include <png.h> #include <png.h>
struct { struct
{
int type; int type;
int bytes_per_pixel_in; int bytes_per_pixel_in;
int bytes_per_pixel_out; int bytes_per_pixel_out;
@ -39,7 +40,8 @@ struct {
{PNG_COLOR_TYPE_GRAY, 4, 2, 16, {"_R32I_lsb.png", "_R32I_msb.png"}}, // R32I_PNG {PNG_COLOR_TYPE_GRAY, 4, 2, 16, {"_R32I_lsb.png", "_R32I_msb.png"}}, // R32I_PNG
}; };
namespace GSPng { namespace GSPng
{
bool SaveFile(const std::string& file, const Format fmt, const uint8* const image, bool SaveFile(const std::string& file, const Format fmt, const uint8* const image,
uint8* const row, const int width, const int height, const int pitch, uint8* const row, const int width, const int height, const int pitch,
@ -60,7 +62,8 @@ namespace GSPng {
png_infop info_ptr = nullptr; png_infop info_ptr = nullptr;
bool success; bool success;
try { try
{
if (png_ptr == nullptr) if (png_ptr == nullptr)
throw GSDXRecoverableError(); throw GSDXRecoverableError();
@ -82,7 +85,8 @@ namespace GSPng {
if (rb_swapped && type != PNG_COLOR_TYPE_GRAY) if (rb_swapped && type != PNG_COLOR_TYPE_GRAY)
png_set_bgr(png_ptr); png_set_bgr(png_ptr);
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x) for (int x = 0; x < width; ++x)
for (int i = 0; i < bytes_per_pixel_out; ++i) for (int i = 0; i < bytes_per_pixel_out; ++i)
row[bytes_per_pixel_out * x + i] = image[y * pitch + bytes_per_pixel_in * x + i + offset]; row[bytes_per_pixel_out * x + i] = image[y * pitch + bytes_per_pixel_in * x + i + offset];
@ -91,7 +95,9 @@ namespace GSPng {
png_write_end(png_ptr, nullptr); png_write_end(png_ptr, nullptr);
success = true; success = true;
} catch (GSDXRecoverableError&) { }
catch (GSDXRecoverableError&)
{
fprintf(stderr, "Failed to write image %s\n", file.c_str()); fprintf(stderr, "Failed to write image %s\n", file.c_str());
success = false; success = false;
@ -148,4 +154,4 @@ namespace GSPng {
Save(item->m_fmt, item->m_file, item->m_image, item->m_w, item->m_h, item->m_pitch, item->m_compression); Save(item->m_fmt, item->m_file, item->m_image, item->m_w, item->m_h, item->m_pitch, item->m_compression);
} }
} } // namespace GSPng

View File

@ -22,8 +22,10 @@
#include "GSThread_CXX11.h" #include "GSThread_CXX11.h"
namespace GSPng { namespace GSPng
enum Format { {
enum Format
{
START = 0, START = 0,
RGBA_PNG = 0, RGBA_PNG = 0,
RGB_PNG, RGB_PNG,
@ -55,4 +57,4 @@ namespace GSPng {
void Process(std::shared_ptr<Transaction>& item); void Process(std::shared_ptr<Transaction>& item);
using Worker = GSJobQueue<std::shared_ptr<Transaction>, 16>; using Worker = GSJobQueue<std::shared_ptr<Transaction>, 16>;
} } // namespace GSPng

View File

@ -274,11 +274,14 @@ public:
template<int index> void Transfer(const uint8* mem, uint32 size); template<int index> void Transfer(const uint8* mem, uint32 size);
int Freeze(GSFreezeData* fd, bool sizeonly); int Freeze(GSFreezeData* fd, bool sizeonly);
int Defrost(const GSFreezeData* fd); int Defrost(const GSFreezeData* fd);
void GetLastTag(uint32* tag) {*tag = m_path3hack; m_path3hack = 0;} void GetLastTag(uint32* tag)
{
*tag = m_path3hack;
m_path3hack = 0;
}
virtual void SetGameCRC(uint32 crc, int options); virtual void SetGameCRC(uint32 crc, int options);
void SetFrameSkip(int skip); void SetFrameSkip(int skip);
void SetRegsMem(uint8* basemem); void SetRegsMem(uint8* basemem);
void SetIrqCallback(void (*irq)()); void SetIrqCallback(void (*irq)());
void SetMultithreaded(bool mt = true); void SetMultithreaded(bool mt = true);
}; };

View File

@ -24,7 +24,8 @@
#include "GSdx.h" #include "GSdx.h"
#include "Utilities/boost_spsc_queue.hpp" #include "Utilities/boost_spsc_queue.hpp"
template<class T, int CAPACITY> class GSJobQueue final template <class T, int CAPACITY>
class GSJobQueue final
{ {
private: private:
std::thread m_thread; std::thread m_thread;
@ -37,12 +38,15 @@ private:
std::condition_variable m_empty; std::condition_variable m_empty;
std::condition_variable m_notempty; std::condition_variable m_notempty;
void ThreadProc() { void ThreadProc()
{
std::unique_lock<std::mutex> l(m_lock); std::unique_lock<std::mutex> l(m_lock);
while (true) { while (true)
{
while (m_queue.empty()) { while (m_queue.empty())
{
if (m_exit) if (m_exit)
return; return;
@ -64,9 +68,9 @@ private:
} }
public: public:
GSJobQueue(std::function<void(T&)> func) : GSJobQueue(std::function<void(T&)> func)
m_func(func), : m_func(func)
m_exit(false) , m_exit(false)
{ {
m_thread = std::thread(&GSJobQueue::ThreadProc, this); m_thread = std::thread(&GSJobQueue::ThreadProc, this);
} }
@ -87,7 +91,8 @@ public:
return m_queue.empty(); return m_queue.empty();
} }
void Push(const T& item) { void Push(const T& item)
{
while (!m_queue.push(item)) while (!m_queue.push(item))
std::this_thread::yield(); std::this_thread::yield();
@ -109,7 +114,8 @@ public:
assert(IsEmpty()); assert(IsEmpty());
} }
void operator() (T& item) { void operator()(T& item)
{
m_func(item); m_func(item);
} }
}; };

View File

@ -211,7 +211,8 @@ bool GSUtil::CheckSSE()
{ {
bool status = true; bool status = true;
struct ISA { struct ISA
{
Xbyak::util::Cpu::Type type; Xbyak::util::Cpu::Type type;
const char* name; const char* name;
}; };
@ -228,8 +229,10 @@ bool GSUtil::CheckSSE()
#endif #endif
}; };
for (size_t i = 0; i < countof(checks); i++) { for (size_t i = 0; i < countof(checks); i++)
if(!g_cpu.has(checks[i].type)) { {
if (!g_cpu.has(checks[i].type))
{
fprintf(stderr, "This CPU does not support %s\n", checks[i].name); fprintf(stderr, "This CPU does not support %s\n", checks[i].name);
status = false; status = false;
@ -323,9 +326,11 @@ GSRendererType GSUtil::GetBestRenderer()
#ifdef _WIN32 #ifdef _WIN32
void GSmkdir(const wchar_t* dir) void GSmkdir(const wchar_t* dir)
{ {
if (!CreateDirectory(dir, nullptr)) { if (!CreateDirectory(dir, nullptr))
{
DWORD errorID = ::GetLastError(); DWORD errorID = ::GetLastError();
if (errorID != ERROR_ALREADY_EXISTS) { if (errorID != ERROR_ALREADY_EXISTS)
{
fprintf(stderr, "Failed to create directory: %ls error %u\n", dir, errorID); fprintf(stderr, "Failed to create directory: %ls error %u\n", dir, errorID);
} }
} }
@ -355,7 +360,8 @@ std::string GStempdir()
const char* psm_str(int psm) const char* psm_str(int psm)
{ {
switch(psm) { switch (psm)
{
// Normal color // Normal color
case PSM_PSMCT32: return "C_32"; case PSM_PSMCT32: return "C_32";
case PSM_PSMCT24: return "C_24"; case PSM_PSMCT24: return "C_24";

View File

@ -177,14 +177,16 @@ GSVector4i GSVector4i::fit(int arx, int ary) const
{ {
w = h * arx / ary; w = h * arx / ary;
r.left = (r.left + r.right - w) >> 1; r.left = (r.left + r.right - w) >> 1;
if(r.left & 1) r.left++; if (r.left & 1)
r.left++;
r.right = r.left + w; r.right = r.left + w;
} }
else else
{ {
h = w * ary / arx; h = w * ary / arx;
r.top = (r.top + r.bottom - h) >> 1; r.top = (r.top + r.bottom - h) >> 1;
if(r.top & 1) r.top++; if (r.top & 1)
r.top++;
r.bottom = r.top + h; r.bottom = r.top + h;
} }

View File

@ -41,7 +41,8 @@ enum Round_Mode
#pragma pack(push, 1) #pragma pack(push, 1)
template<class T> class GSVector2T template <class T>
class GSVector2T
{ {
public: public:
union union

View File

@ -248,7 +248,8 @@ public:
return (v + v) - (v * v) * *this; return (v + v) - (v * v) * *this;
} }
template<int mode> __forceinline GSVector4 round() const template <int mode>
__forceinline GSVector4 round() const
{ {
return GSVector4(_mm_round_ps(m, mode)); return GSVector4(_mm_round_ps(m, mode));
} }
@ -396,7 +397,8 @@ public:
return GSVector4(_mm_hsub_ps(m, v.m)); return GSVector4(_mm_hsub_ps(m, v.m));
} }
template<int i> __forceinline GSVector4 dp(const GSVector4& v) const template <int i>
__forceinline GSVector4 dp(const GSVector4& v) const
{ {
return GSVector4(_mm_dp_ps(m, v.m, i)); return GSVector4(_mm_dp_ps(m, v.m, i));
} }
@ -431,7 +433,8 @@ public:
return GSVector4(_mm_max_ps(m, a)); return GSVector4(_mm_max_ps(m, a));
} }
template<int mask> __forceinline GSVector4 blend32(const GSVector4& a) const template <int mask>
__forceinline GSVector4 blend32(const GSVector4& a) const
{ {
return GSVector4(_mm_blend_ps(m, a, mask)); return GSVector4(_mm_blend_ps(m, a, mask));
} }
@ -506,7 +509,8 @@ public:
return v.blend32(*this, *this == *this); return v.blend32(*this, *this == *this);
} }
template<int src, int dst> __forceinline GSVector4 insert32(const GSVector4& v) const template <int src, int dst>
__forceinline GSVector4 insert32(const GSVector4& v) const
{ {
// TODO: use blendps when src == dst // TODO: use blendps when src == dst
@ -582,12 +586,14 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
template<int i> __forceinline int extract32() const template<int i> __forceinline int extract32() const
#endif #endif
template<int index> __forceinline int extract32() const template <int index>
__forceinline int extract32() const
{ {
return _mm_extract_ps(m, index); return _mm_extract_ps(m, index);
} }
#else #else
template<int i> __forceinline int extract32() const template <int i>
__forceinline int extract32() const
{ {
return _mm_extract_ps(m, i); return _mm_extract_ps(m, i);
} }
@ -630,7 +636,8 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31))); return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
} }
template<bool aligned> __forceinline static GSVector4 load(const void* p) template <bool aligned>
__forceinline static GSVector4 load(const void* p)
{ {
return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p)); return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p));
} }
@ -650,10 +657,13 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
_mm_storeh_pd((double*)p, _mm_castps_pd(v.m)); _mm_storeh_pd((double*)p, _mm_castps_pd(v.m));
} }
template<bool aligned> __forceinline static void store(void* p, const GSVector4& v) template <bool aligned>
__forceinline static void store(void* p, const GSVector4& v)
{ {
if(aligned) _mm_store_ps((float*)p, v.m); if (aligned)
else _mm_storeu_ps((float*)p, v.m); _mm_store_ps((float*)p, v.m);
else
_mm_storeu_ps((float*)p, v.m);
} }
__forceinline static void store(float* p, const GSVector4& v) __forceinline static void store(float* p, const GSVector4& v)
@ -710,7 +720,8 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
b = v2.h2l(v0); b = v2.h2l(v0);
c = v1.l2h(v3); c = v1.l2h(v3);
d = v3.h2l(v1); d = v3.h2l(v1);
*/ } */
}
__forceinline GSVector4 operator-() const __forceinline GSVector4 operator-() const
{ {

View File

@ -70,7 +70,8 @@ public:
__m128i m; __m128i m;
}; };
__forceinline constexpr GSVector4i(): x(0), y(0), z(0), w(0) __forceinline constexpr GSVector4i()
: x(0), y(0), z(0), w(0)
{ {
} }
@ -237,7 +238,8 @@ public:
return sat_i32(a); return sat_i32(a);
} }
template<int mode> __forceinline GSVector4i ralign(const GSVector2i& a) const template <int mode>
__forceinline GSVector4i ralign(const GSVector2i& a) const
{ {
// a must be 1 << n // a must be 1 << n
@ -422,14 +424,16 @@ public:
return GSVector4i(_mm_blendv_epi8(m, a, mask)); return GSVector4i(_mm_blendv_epi8(m, a, mask));
} }
template<int mask> __forceinline GSVector4i blend16(const GSVector4i& a) const template <int mask>
__forceinline GSVector4i blend16(const GSVector4i& a) const
{ {
return GSVector4i(_mm_blend_epi16(m, a, mask)); return GSVector4i(_mm_blend_epi16(m, a, mask));
} }
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
template<int mask> __forceinline GSVector4i blend32(const GSVector4i& v) const template <int mask>
__forceinline GSVector4i blend32(const GSVector4i& v) const
{ {
return GSVector4i(_mm_blend_epi32(m, v.m, mask)); return GSVector4i(_mm_blend_epi32(m, v.m, mask));
} }
@ -660,17 +664,20 @@ public:
return GSVector4i(_mm_cvtepu32_epi64(m)); return GSVector4i(_mm_cvtepu32_epi64(m));
} }
template<int i> __forceinline GSVector4i srl() const template <int i>
__forceinline GSVector4i srl() const
{ {
return GSVector4i(_mm_srli_si128(m, i)); return GSVector4i(_mm_srli_si128(m, i));
} }
template<int i> __forceinline GSVector4i srl(const GSVector4i& v) template <int i>
__forceinline GSVector4i srl(const GSVector4i& v)
{ {
return GSVector4i(_mm_alignr_epi8(v.m, m, i)); return GSVector4i(_mm_alignr_epi8(v.m, m, i));
} }
template<int i> __forceinline GSVector4i sll() const template <int i>
__forceinline GSVector4i sll() const
{ {
return GSVector4i(_mm_slli_si128(m, i)); return GSVector4i(_mm_slli_si128(m, i));
} }
@ -874,21 +881,24 @@ public:
return GSVector4i(_mm_madd_epi16(m, v.m)); return GSVector4i(_mm_madd_epi16(m, v.m));
} }
template<int shift> __forceinline GSVector4i lerp16(const GSVector4i& a, const GSVector4i& f) const template <int shift>
__forceinline GSVector4i lerp16(const GSVector4i& a, const GSVector4i& f) const
{ {
// (a - this) * f << shift + this // (a - this) * f << shift + this
return add16(a.sub16(*this).modulate16<shift>(f)); return add16(a.sub16(*this).modulate16<shift>(f));
} }
template<int shift> __forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c) template <int shift>
__forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c)
{ {
// (a - b) * c << shift // (a - b) * c << shift
return a.sub16(b).modulate16<shift>(c); return a.sub16(b).modulate16<shift>(c);
} }
template<int shift> __forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c, const GSVector4i& d) template <int shift>
__forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c, const GSVector4i& d)
{ {
// (a - b) * c << shift + d // (a - b) * c << shift + d
@ -902,7 +912,8 @@ public:
return add16(a.sub16(*this).mul16l(f).sra16(4)); return add16(a.sub16(*this).mul16l(f).sra16(4));
} }
template<int shift> __forceinline GSVector4i modulate16(const GSVector4i& f) const template <int shift>
__forceinline GSVector4i modulate16(const GSVector4i& f) const
{ {
// a * f << shift // a * f << shift
@ -1007,56 +1018,66 @@ public:
return _mm_testz_si128(m, m) != 0; return _mm_testz_si128(m, m) != 0;
} }
template<int i> __forceinline GSVector4i insert8(int a) const template <int i>
__forceinline GSVector4i insert8(int a) const
{ {
return GSVector4i(_mm_insert_epi8(m, a, i)); return GSVector4i(_mm_insert_epi8(m, a, i));
} }
template<int i> __forceinline int extract8() const template <int i>
__forceinline int extract8() const
{ {
return _mm_extract_epi8(m, i); return _mm_extract_epi8(m, i);
} }
template<int i> __forceinline GSVector4i insert16(int a) const template <int i>
__forceinline GSVector4i insert16(int a) const
{ {
return GSVector4i(_mm_insert_epi16(m, a, i)); return GSVector4i(_mm_insert_epi16(m, a, i));
} }
template<int i> __forceinline int extract16() const template <int i>
__forceinline int extract16() const
{ {
return _mm_extract_epi16(m, i); return _mm_extract_epi16(m, i);
} }
template<int i> __forceinline GSVector4i insert32(int a) const template <int i>
__forceinline GSVector4i insert32(int a) const
{ {
return GSVector4i(_mm_insert_epi32(m, a, i)); return GSVector4i(_mm_insert_epi32(m, a, i));
} }
template<int i> __forceinline int extract32() const template <int i>
__forceinline int extract32() const
{ {
if(i == 0) return GSVector4i::store(*this); if (i == 0)
return GSVector4i::store(*this);
return _mm_extract_epi32(m, i); return _mm_extract_epi32(m, i);
} }
#ifdef _M_AMD64 #ifdef _M_AMD64
template<int i> __forceinline GSVector4i insert64(int64 a) const template <int i>
__forceinline GSVector4i insert64(int64 a) const
{ {
return GSVector4i(_mm_insert_epi64(m, a, i)); return GSVector4i(_mm_insert_epi64(m, a, i));
} }
template<int i> __forceinline int64 extract64() const template <int i>
__forceinline int64 extract64() const
{ {
if(i == 0) return GSVector4i::storeq(*this); if (i == 0)
return GSVector4i::storeq(*this);
return _mm_extract_epi64(m, i); return _mm_extract_epi64(m, i);
} }
#endif #endif
template <int src, class T>
template<int src, class T> __forceinline GSVector4i gather8_4(const T* ptr) const __forceinline GSVector4i gather8_4(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1080,7 +1101,8 @@ public:
return v; return v;
} }
template<class T> __forceinline GSVector4i gather8_8(const T* ptr) const template <class T>
__forceinline GSVector4i gather8_8(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1104,7 +1126,8 @@ public:
return v; return v;
} }
template<int dst, class T> __forceinline GSVector4i gather8_16(const T* ptr, const GSVector4i& a) const template <int dst, class T>
__forceinline GSVector4i gather8_16(const T* ptr, const GSVector4i& a) const
{ {
GSVector4i v = a; GSVector4i v = a;
@ -1120,7 +1143,8 @@ public:
return v; return v;
} }
template<int dst, class T> __forceinline GSVector4i gather8_32(const T* ptr, const GSVector4i& a) const template <int dst, class T>
__forceinline GSVector4i gather8_32(const T* ptr, const GSVector4i& a) const
{ {
GSVector4i v = a; GSVector4i v = a;
@ -1132,7 +1156,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather16_4(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather16_4(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1148,7 +1173,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather16_8(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather16_8(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1164,7 +1190,8 @@ public:
return v; return v;
} }
template<class T>__forceinline GSVector4i gather16_16(const T* ptr) const template <class T>
__forceinline GSVector4i gather16_16(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1180,7 +1207,8 @@ public:
return v; return v;
} }
template<class T1, class T2>__forceinline GSVector4i gather16_16(const T1* ptr1, const T2* ptr2) const template <class T1, class T2>
__forceinline GSVector4i gather16_16(const T1* ptr1, const T2* ptr2) const
{ {
GSVector4i v; GSVector4i v;
@ -1196,7 +1224,8 @@ public:
return v; return v;
} }
template<int dst, class T> __forceinline GSVector4i gather16_32(const T* ptr, const GSVector4i& a) const template <int dst, class T>
__forceinline GSVector4i gather16_32(const T* ptr, const GSVector4i& a) const
{ {
GSVector4i v = a; GSVector4i v = a;
@ -1208,7 +1237,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather32_4(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather32_4(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1219,7 +1249,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather32_8(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather32_8(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1231,7 +1262,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather32_16(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather32_16(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1243,7 +1275,8 @@ public:
return v; return v;
} }
template<class T> __forceinline GSVector4i gather32_32(const T* ptr) const template <class T>
__forceinline GSVector4i gather32_32(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1255,7 +1288,8 @@ public:
return v; return v;
} }
template<class T1, class T2> __forceinline GSVector4i gather32_32(const T1* ptr1, const T2* ptr2) const template <class T1, class T2>
__forceinline GSVector4i gather32_32(const T1* ptr1, const T2* ptr2) const
{ {
GSVector4i v; GSVector4i v;
@ -1269,7 +1303,8 @@ public:
#if defined(_M_AMD64) #if defined(_M_AMD64)
template<int src, class T> __forceinline GSVector4i gather64_4(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather64_4(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1279,7 +1314,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather64_8(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather64_8(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1289,7 +1325,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather64_16(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather64_16(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1299,7 +1336,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather64_32(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather64_32(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1309,7 +1347,8 @@ public:
return v; return v;
} }
template<class T> __forceinline GSVector4i gather64_64(const T* ptr) const template <class T>
__forceinline GSVector4i gather64_64(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1321,7 +1360,8 @@ public:
#else #else
template<int src, class T> __forceinline GSVector4i gather64_4(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather64_4(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1330,7 +1370,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather64_8(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather64_8(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1339,7 +1380,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather64_16(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather64_16(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1348,7 +1390,8 @@ public:
return v; return v;
} }
template<int src, class T> __forceinline GSVector4i gather64_32(const T* ptr) const template <int src, class T>
__forceinline GSVector4i gather64_32(const T* ptr) const
{ {
GSVector4i v; GSVector4i v;
@ -1359,7 +1402,8 @@ public:
#endif #endif
template<class T> __forceinline void gather8_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather8_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather8_4<0>(ptr); dst[0] = gather8_4<0>(ptr);
dst[1] = gather8_4<8>(ptr); dst[1] = gather8_4<8>(ptr);
@ -1370,7 +1414,8 @@ public:
dst[0] = gather8_8<>(ptr); dst[0] = gather8_8<>(ptr);
} }
template<class T> __forceinline void gather16_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather16_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather16_4<0>(ptr); dst[0] = gather16_4<0>(ptr);
dst[1] = gather16_4<4>(ptr); dst[1] = gather16_4<4>(ptr);
@ -1378,18 +1423,21 @@ public:
dst[3] = gather16_4<12>(ptr); dst[3] = gather16_4<12>(ptr);
} }
template<class T> __forceinline void gather16_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather16_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather16_8<0>(ptr); dst[0] = gather16_8<0>(ptr);
dst[1] = gather16_8<8>(ptr); dst[1] = gather16_8<8>(ptr);
} }
template<class T> __forceinline void gather16_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather16_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather16_16<>(ptr); dst[0] = gather16_16<>(ptr);
} }
template<class T> __forceinline void gather32_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather32_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather32_4<0>(ptr); dst[0] = gather32_4<0>(ptr);
dst[1] = gather32_4<2>(ptr); dst[1] = gather32_4<2>(ptr);
@ -1401,7 +1449,8 @@ public:
dst[7] = gather32_4<14>(ptr); dst[7] = gather32_4<14>(ptr);
} }
template<class T> __forceinline void gather32_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather32_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather32_8<0>(ptr); dst[0] = gather32_8<0>(ptr);
dst[1] = gather32_8<4>(ptr); dst[1] = gather32_8<4>(ptr);
@ -1409,18 +1458,21 @@ public:
dst[3] = gather32_8<12>(ptr); dst[3] = gather32_8<12>(ptr);
} }
template<class T> __forceinline void gather32_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather32_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather32_16<0>(ptr); dst[0] = gather32_16<0>(ptr);
dst[1] = gather32_16<4>(ptr); dst[1] = gather32_16<4>(ptr);
} }
template<class T> __forceinline void gather32_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather32_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather32_32<>(ptr); dst[0] = gather32_32<>(ptr);
} }
template<class T> __forceinline void gather64_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather64_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather64_4<0>(ptr); dst[0] = gather64_4<0>(ptr);
dst[1] = gather64_4<1>(ptr); dst[1] = gather64_4<1>(ptr);
@ -1440,7 +1492,8 @@ public:
dst[15] = gather64_4<15>(ptr); dst[15] = gather64_4<15>(ptr);
} }
template<class T> __forceinline void gather64_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather64_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather64_8<0>(ptr); dst[0] = gather64_8<0>(ptr);
dst[1] = gather64_8<2>(ptr); dst[1] = gather64_8<2>(ptr);
@ -1452,7 +1505,8 @@ public:
dst[7] = gather64_8<14>(ptr); dst[7] = gather64_8<14>(ptr);
} }
template<class T> __forceinline void gather64_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather64_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather64_16<0>(ptr); dst[0] = gather64_16<0>(ptr);
dst[1] = gather64_16<2>(ptr); dst[1] = gather64_16<2>(ptr);
@ -1460,7 +1514,8 @@ public:
dst[3] = gather64_16<8>(ptr); dst[3] = gather64_16<8>(ptr);
} }
template<class T> __forceinline void gather64_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather64_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather64_32<0>(ptr); dst[0] = gather64_32<0>(ptr);
dst[1] = gather64_32<2>(ptr); dst[1] = gather64_32<2>(ptr);
@ -1468,7 +1523,8 @@ public:
#ifdef _M_AMD64 #ifdef _M_AMD64
template<class T> __forceinline void gather64_64(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const template <class T>
__forceinline void gather64_64(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const
{ {
dst[0] = gather64_64<>(ptr); dst[0] = gather64_64<>(ptr);
} }
@ -1508,7 +1564,8 @@ public:
return GSVector4i(_mm_unpacklo_epi64(lo, hi)); return GSVector4i(_mm_unpacklo_epi64(lo, hi));
} }
*/ */
template<bool aligned> __forceinline static GSVector4i load(const void* p) template <bool aligned>
__forceinline static GSVector4i load(const void* p)
{ {
return GSVector4i(aligned ? _mm_load_si128((__m128i*)p) : _mm_loadu_si128((__m128i*)p)); return GSVector4i(aligned ? _mm_load_si128((__m128i*)p) : _mm_loadu_si128((__m128i*)p));
} }
@ -1548,10 +1605,13 @@ public:
GSVector4i::storeh(ph, v); GSVector4i::storeh(ph, v);
} }
template<bool aligned> __forceinline static void store(void* p, const GSVector4i& v) template <bool aligned>
__forceinline static void store(void* p, const GSVector4i& v)
{ {
if(aligned) _mm_store_si128((__m128i*)p, v.m); if (aligned)
else _mm_storeu_si128((__m128i*)p, v.m); _mm_store_si128((__m128i*)p, v.m);
else
_mm_storeu_si128((__m128i*)p, v.m);
} }
__forceinline static int store(const GSVector4i& v) __forceinline static int store(const GSVector4i& v)
@ -1573,7 +1633,8 @@ public:
const GSVector4i* s = (const GSVector4i*)src; const GSVector4i* s = (const GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst; GSVector4i* d = (GSVector4i*)dst;
if(size == 0) return; if (size == 0)
return;
size_t i = 0; size_t i = 0;
size_t j = size >> 6; size_t j = size >> 6;
@ -1588,7 +1649,8 @@ public:
size &= 63; size &= 63;
if(size == 0) return; if (size == 0)
return;
memcpy(d, s, size); memcpy(d, s, size);
} }

View File

@ -251,7 +251,8 @@ public:
return (v + v) - (v * v) * *this; return (v + v) - (v * v) * *this;
} }
template<int mode> __forceinline GSVector8 round() const template <int mode>
__forceinline GSVector8 round() const
{ {
return GSVector8(_mm256_round_ps(m, mode)); return GSVector8(_mm256_round_ps(m, mode));
} }
@ -396,7 +397,8 @@ public:
return GSVector8(_mm256_hsub_ps(m, v.m)); return GSVector8(_mm256_hsub_ps(m, v.m));
} }
template<int i> __forceinline GSVector8 dp(const GSVector8& v) const template <int i>
__forceinline GSVector8 dp(const GSVector8& v) const
{ {
return GSVector8(_mm256_dp_ps(m, v.m, i)); return GSVector8(_mm256_dp_ps(m, v.m, i));
} }
@ -431,7 +433,8 @@ public:
return GSVector8(_mm256_max_ps(m, a)); return GSVector8(_mm256_max_ps(m, a));
} }
template<int mask> __forceinline GSVector8 blend32(const GSVector8& a) const template <int mask>
__forceinline GSVector8 blend32(const GSVector8& a) const
{ {
return GSVector8(_mm256_blend_ps(m, a, mask)); return GSVector8(_mm256_blend_ps(m, a, mask));
} }
@ -496,7 +499,8 @@ public:
return v.blend32(*this, *this == *this); return v.blend32(*this, *this == *this);
} }
template<int src, int dst> __forceinline GSVector8 insert32(const GSVector8& v) const template <int src, int dst>
__forceinline GSVector8 insert32(const GSVector8& v) const
{ {
// TODO: use blendps when src == dst // TODO: use blendps when src == dst
@ -551,25 +555,29 @@ public:
return *this; return *this;
} }
template<int i> __forceinline int extract32() const template <int i>
__forceinline int extract32() const
{ {
ASSERT(i < 8); ASSERT(i < 8);
return extract<i / 4>().template extract32<i & 3>(); return extract<i / 4>().template extract32<i & 3>();
} }
template<int i> __forceinline GSVector8 insert(__m128 m) const template <int i>
__forceinline GSVector8 insert(__m128 m) const
{ {
ASSERT(i < 2); ASSERT(i < 2);
return GSVector8(_mm256_insertf128_ps(this->m, m, i)); return GSVector8(_mm256_insertf128_ps(this->m, m, i));
} }
template<int i> __forceinline GSVector4 extract() const template <int i>
__forceinline GSVector4 extract() const
{ {
ASSERT(i < 2); ASSERT(i < 2);
if(i == 0) return GSVector4(_mm256_castps256_ps128(m)); if (i == 0)
return GSVector4(_mm256_castps256_ps128(m));
return GSVector4(_mm256_extractf128_ps(m, i)); return GSVector4(_mm256_extractf128_ps(m, i));
} }
@ -606,7 +614,8 @@ public:
return loadh(ph, loadl(pl)); return loadh(ph, loadl(pl));
} }
template<bool aligned> __forceinline static GSVector8 load(const void* p) template <bool aligned>
__forceinline static GSVector8 load(const void* p)
{ {
return GSVector8(aligned ? _mm256_load_ps((const float*)p) : _mm256_loadu_ps((const float*)p)); return GSVector8(aligned ? _mm256_load_ps((const float*)p) : _mm256_loadu_ps((const float*)p));
} }
@ -623,10 +632,13 @@ public:
_mm_store_ps((float*)p, _mm256_extractf128_ps(v.m, 1)); _mm_store_ps((float*)p, _mm256_extractf128_ps(v.m, 1));
} }
template<bool aligned> __forceinline static void store(void* p, const GSVector8& v) template <bool aligned>
__forceinline static void store(void* p, const GSVector8& v)
{ {
if(aligned) _mm256_store_ps((float*)p, v.m); if (aligned)
else _mm256_storeu_ps((float*)p, v.m); _mm256_store_ps((float*)p, v.m);
else
_mm256_storeu_ps((float*)p, v.m);
} }
// //

View File

@ -316,7 +316,8 @@ public:
return GSVector8i(_mm256_blendv_epi8(m, a, mask)); return GSVector8i(_mm256_blendv_epi8(m, a, mask));
} }
template<int mask> __forceinline GSVector8i blend16(const GSVector8i& a) const template <int mask>
__forceinline GSVector8i blend16(const GSVector8i& a) const
{ {
return GSVector8i(_mm256_blend_epi16(m, a, mask)); return GSVector8i(_mm256_blend_epi16(m, a, mask));
} }
@ -582,17 +583,20 @@ public:
// //
template<int i> __forceinline GSVector8i srl() const template <int i>
__forceinline GSVector8i srl() const
{ {
return GSVector8i(_mm256_srli_si256(m, i)); return GSVector8i(_mm256_srli_si256(m, i));
} }
template<int i> __forceinline GSVector8i srl(const GSVector8i& v) template <int i>
__forceinline GSVector8i srl(const GSVector8i& v)
{ {
return GSVector8i(_mm256_alignr_epi8(v.m, m, i)); return GSVector8i(_mm256_alignr_epi8(v.m, m, i));
} }
template<int i> __forceinline GSVector8i sll() const template <int i>
__forceinline GSVector8i sll() const
{ {
return GSVector8i(_mm256_slli_si256(m, i)); return GSVector8i(_mm256_slli_si256(m, i));
//return GSVector8i(_mm256_slli_si128(m, i)); //return GSVector8i(_mm256_slli_si128(m, i));
@ -848,21 +852,24 @@ public:
return GSVector8i(_mm256_madd_epi16(m, v.m)); return GSVector8i(_mm256_madd_epi16(m, v.m));
} }
template<int shift> __forceinline GSVector8i lerp16(const GSVector8i& a, const GSVector8i& f) const template <int shift>
__forceinline GSVector8i lerp16(const GSVector8i& a, const GSVector8i& f) const
{ {
// (a - this) * f << shift + this // (a - this) * f << shift + this
return add16(a.sub16(*this).modulate16<shift>(f)); return add16(a.sub16(*this).modulate16<shift>(f));
} }
template<int shift> __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c) template <int shift>
__forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c)
{ {
// (a - b) * c << shift // (a - b) * c << shift
return a.sub16(b).modulate16<shift>(c); return a.sub16(b).modulate16<shift>(c);
} }
template<int shift> __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c, const GSVector8i& d) template <int shift>
__forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c, const GSVector8i& d)
{ {
// (a - b) * c << shift + d // (a - b) * c << shift + d
@ -876,7 +883,8 @@ public:
return add16(a.sub16(*this).mul16l(f).sra16(4)); return add16(a.sub16(*this).mul16l(f).sra16(4));
} }
template<int shift> __forceinline GSVector8i modulate16(const GSVector8i& f) const template <int shift>
__forceinline GSVector8i modulate16(const GSVector8i& f) const
{ {
// a * f << shift // a * f << shift
@ -977,7 +985,8 @@ public:
// TODO: extract/insert // TODO: extract/insert
template<int i> __forceinline int extract8() const template <int i>
__forceinline int extract8() const
{ {
ASSERT(i < 32); ASSERT(i < 32);
@ -986,7 +995,8 @@ public:
return v.extract8<i & 15>(); return v.extract8<i & 15>();
} }
template<int i> __forceinline int extract16() const template <int i>
__forceinline int extract16() const
{ {
ASSERT(i < 16); ASSERT(i < 16);
@ -995,27 +1005,32 @@ public:
return v.extract16<i & 8>(); return v.extract16<i & 8>();
} }
template<int i> __forceinline int extract32() const template <int i>
__forceinline int extract32() const
{ {
ASSERT(i < 8); ASSERT(i < 8);
GSVector4i v = extract<i / 4>(); GSVector4i v = extract<i / 4>();
if((i & 3) == 0) return GSVector4i::store(v); if ((i & 3) == 0)
return GSVector4i::store(v);
return v.extract32<i & 3>(); return v.extract32<i & 3>();
} }
template<int i> __forceinline GSVector4i extract() const template <int i>
__forceinline GSVector4i extract() const
{ {
ASSERT(i < 2); ASSERT(i < 2);
if(i == 0) return GSVector4i(_mm256_castsi256_si128(m)); if (i == 0)
return GSVector4i(_mm256_castsi256_si128(m));
return GSVector4i(_mm256_extracti128_si256(m, i)); return GSVector4i(_mm256_extracti128_si256(m, i));
} }
template<int i> __forceinline GSVector8i insert(__m128i m) const template <int i>
__forceinline GSVector8i insert(__m128i m) const
{ {
ASSERT(i < 2); ASSERT(i < 2);
@ -1024,7 +1039,8 @@ public:
// TODO: gather // TODO: gather
template<class T> __forceinline GSVector8i gather32_32(const T* ptr) const template <class T>
__forceinline GSVector8i gather32_32(const T* ptr) const
{ {
GSVector4i v0; GSVector4i v0;
GSVector4i v1; GSVector4i v1;
@ -1060,7 +1076,8 @@ public:
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4)); return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
} }
template<class T1, class T2> __forceinline GSVector8i gather32_32(const T1* ptr1, const T2* ptr2) const template <class T1, class T2>
__forceinline GSVector8i gather32_32(const T1* ptr1, const T2* ptr2) const
{ {
GSVector4i v0; GSVector4i v0;
GSVector4i v1; GSVector4i v1;
@ -1091,7 +1108,8 @@ public:
return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2); return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2);
} }
template<class T> __forceinline void gather32_32(const T* RESTRICT ptr, GSVector8i* RESTRICT dst) const template <class T>
__forceinline void gather32_32(const T* RESTRICT ptr, GSVector8i* RESTRICT dst) const
{ {
dst[0] = gather32_32<>(ptr); dst[0] = gather32_32<>(ptr);
} }
@ -1144,7 +1162,8 @@ public:
// return GSVector8i(l).insert<1>(h); // return GSVector8i(l).insert<1>(h);
} }
template<bool aligned> __forceinline static GSVector8i load(const void* p) template <bool aligned>
__forceinline static GSVector8i load(const void* p)
{ {
return GSVector8i(aligned ? _mm256_load_si256((__m256i*)p) : _mm256_loadu_si256((__m256i*)p)); return GSVector8i(aligned ? _mm256_load_si256((__m256i*)p) : _mm256_loadu_si256((__m256i*)p));
} }
@ -1184,10 +1203,13 @@ public:
GSVector8i::storeh(ph, v); GSVector8i::storeh(ph, v);
} }
template<bool aligned> __forceinline static void store(void* p, const GSVector8i& v) template <bool aligned>
__forceinline static void store(void* p, const GSVector8i& v)
{ {
if(aligned) _mm256_store_si256((__m256i*)p, v.m); if (aligned)
else _mm256_storeu_si256((__m256i*)p, v.m); _mm256_store_si256((__m256i*)p, v.m);
else
_mm256_storeu_si256((__m256i*)p, v.m);
} }
__forceinline static int store(const GSVector8i& v) __forceinline static int store(const GSVector8i& v)
@ -1209,7 +1231,8 @@ public:
const GSVector8i* s = (const GSVector8i*)src; const GSVector8i* s = (const GSVector8i*)src;
GSVector8i* d = (GSVector8i*)dst; GSVector8i* d = (GSVector8i*)dst;
if(size == 0) return; if (size == 0)
return;
size_t i = 0; size_t i = 0;
size_t j = size >> 7; size_t j = size >> 7;
@ -1224,7 +1247,8 @@ public:
size &= 127; size &= 127;
if(size == 0) return; if (size == 0)
return;
memcpy(d, s, size); memcpy(d, s, size);
} }

View File

@ -47,11 +47,14 @@ bool GSdxApp::LoadResource(int id, std::vector<char>& buff, const wchar_t* type)
{ {
buff.clear(); buff.clear();
HRSRC hRsrc = FindResource((HMODULE)s_hModule, MAKEINTRESOURCE(id), type != NULL ? type : (LPWSTR)RT_RCDATA); HRSRC hRsrc = FindResource((HMODULE)s_hModule, MAKEINTRESOURCE(id), type != NULL ? type : (LPWSTR)RT_RCDATA);
if(!hRsrc) return false; if (!hRsrc)
return false;
HGLOBAL hGlobal = ::LoadResource((HMODULE)s_hModule, hRsrc); HGLOBAL hGlobal = ::LoadResource((HMODULE)s_hModule, hRsrc);
if(!hGlobal) return false; if (!hGlobal)
return false;
DWORD size = SizeofResource((HMODULE)s_hModule, hRsrc); DWORD size = SizeofResource((HMODULE)s_hModule, hRsrc);
if(!size) return false; if (!size)
return false;
// On Linux resources are always NULL terminated // On Linux resources are always NULL terminated
// Add + 1 on size to do the same for compatibility sake (required by GSDeviceOGL) // Add + 1 on size to do the same for compatibility sake (required by GSDeviceOGL)
buff.resize(size + 1); buff.resize(size + 1);
@ -66,7 +69,8 @@ bool GSdxApp::LoadResource(int id, std::vector<char>& buff, const wchar_t* type)
bool GSdxApp::LoadResource(int id, std::vector<char>& buff, const char* type) bool GSdxApp::LoadResource(int id, std::vector<char>& buff, const char* type)
{ {
std::string path; std::string path;
switch (id) { switch (id)
{
case IDR_COMMON_GLSL: case IDR_COMMON_GLSL:
path = "/GSdx/res/glsl/common_header.glsl"; path = "/GSdx/res/glsl/common_header.glsl";
break; break;
@ -104,7 +108,8 @@ bool GSdxApp::LoadResource(int id, std::vector<char>& buff, const char* type)
size_t size = 0; size_t size = 0;
const void* data = g_bytes_get_data(bytes, &size); const void* data = g_bytes_get_data(bytes, &size);
if (data == nullptr || size == 0) { if (data == nullptr || size == 0)
{
printf("Failed to get data for resource: %d\n", id); printf("Failed to get data for resource: %d\n", id);
return false; return false;
} }
@ -125,11 +130,13 @@ size_t GSdxApp::GetIniString(const char* lpAppName, const char* lpKeyName, const
std::string key(lpKeyName); std::string key(lpKeyName);
std::string value = m_configuration_map[key]; std::string value = m_configuration_map[key];
if (value.empty()) { if (value.empty())
{
// save the value for futur call // save the value for futur call
m_configuration_map[key] = std::string(lpDefault); m_configuration_map[key] = std::string(lpDefault);
strcpy(lpReturnedString, lpDefault); strcpy(lpReturnedString, lpDefault);
} else }
else
strcpy(lpReturnedString, value.c_str()); strcpy(lpReturnedString, value.c_str());
return 0; return 0;
@ -146,16 +153,19 @@ bool GSdxApp::WriteIniString(const char* lpAppName, const char* lpKeyName, const
// Save config to a file // Save config to a file
FILE* f = px_fopen(lpFileName, "w"); FILE* f = px_fopen(lpFileName, "w");
if (f == NULL) return false; // FIXME print a nice message if (f == NULL)
return false; // FIXME print a nice message
// Maintain compatibility with GSDumpGUI/old Windows ini. // Maintain compatibility with GSDumpGUI/old Windows ini.
#ifdef _WIN32 #ifdef _WIN32
fprintf(f, "[Settings]\n"); fprintf(f, "[Settings]\n");
#endif #endif
for (const auto& entry : m_configuration_map) { for (const auto& entry : m_configuration_map)
{
// Do not save the inifile key which is not an option // Do not save the inifile key which is not an option
if (entry.first.compare("inifile") == 0) continue; if (entry.first.compare("inifile") == 0)
continue;
// Only keep option that have a default value (allow to purge old option of the GSdx.ini) // Only keep option that have a default value (allow to purge old option of the GSdx.ini)
if (!entry.second.empty() && m_default_configuration.find(entry.first) != m_default_configuration.end()) if (!entry.second.empty() && m_default_configuration.find(entry.first) != m_default_configuration.end())
@ -171,11 +181,13 @@ int GSdxApp::GetIniInt(const char* lpAppName, const char* lpKeyName, int nDefaul
BuildConfigurationMap(lpFileName); BuildConfigurationMap(lpFileName);
std::string value = m_configuration_map[std::string(lpKeyName)]; std::string value = m_configuration_map[std::string(lpKeyName)];
if (value.empty()) { if (value.empty())
{
// save the value for futur call // save the value for futur call
SetConfig(lpKeyName, nDefault); SetConfig(lpKeyName, nDefault);
return nDefault; return nDefault;
} else }
else
return atoi(value.c_str()); return atoi(value.c_str());
} }
@ -425,10 +437,12 @@ void GSdxApp::Init()
void GSdxApp::ReloadConfig() void GSdxApp::ReloadConfig()
{ {
if (m_configuration_map.empty()) return; if (m_configuration_map.empty())
return;
auto file = m_configuration_map.find("inifile"); auto file = m_configuration_map.find("inifile");
if (file == m_configuration_map.end()) return; if (file == m_configuration_map.end())
return;
// A map was built so reload it // A map was built so reload it
std::string filename = file->second; std::string filename = file->second;
@ -440,7 +454,8 @@ void GSdxApp::BuildConfigurationMap(const char* lpFileName)
{ {
// Check if the map was already built // Check if the map was already built
std::string inifile_value(lpFileName); std::string inifile_value(lpFileName);
if ( inifile_value.compare(m_configuration_map["inifile"]) == 0 ) return; if (inifile_value.compare(m_configuration_map["inifile"]) == 0)
return;
m_configuration_map["inifile"] = inifile_value; m_configuration_map["inifile"] = inifile_value;
// Load config from file // Load config from file
@ -453,7 +468,8 @@ void GSdxApp::BuildConfigurationMap(const char* lpFileName)
return; return;
std::string line; std::string line;
while (std::getline(file, line)) { while (std::getline(file, line))
{
const auto separator = line.find('='); const auto separator = line.find('=');
if (separator == std::string::npos) if (separator == std::string::npos)
continue; continue;
@ -507,9 +523,12 @@ std::string GSdxApp::GetConfigS(const char* entry)
char buff[4096] = {0}; char buff[4096] = {0};
auto def = m_default_configuration.find(entry); auto def = m_default_configuration.find(entry);
if (def != m_default_configuration.end()) { if (def != m_default_configuration.end())
{
GetIniString(m_section.c_str(), entry, def->second.c_str(), buff, countof(buff), m_ini.c_str()); GetIniString(m_section.c_str(), entry, def->second.c_str(), buff, countof(buff), m_ini.c_str());
} else { }
else
{
fprintf(stderr, "Option %s doesn't have a default value\n", entry); fprintf(stderr, "Option %s doesn't have a default value\n", entry);
GetIniString(m_section.c_str(), entry, "", buff, countof(buff), m_ini.c_str()); GetIniString(m_section.c_str(), entry, "", buff, countof(buff), m_ini.c_str());
} }
@ -526,9 +545,12 @@ int GSdxApp::GetConfigI(const char* entry)
{ {
auto def = m_default_configuration.find(entry); auto def = m_default_configuration.find(entry);
if (def != m_default_configuration.end()) { if (def != m_default_configuration.end())
{
return GetIniInt(m_section.c_str(), entry, std::stoi(def->second), m_ini.c_str()); return GetIniInt(m_section.c_str(), entry, std::stoi(def->second), m_ini.c_str());
} else { }
else
{
fprintf(stderr, "Option %s doesn't have a default value\n", entry); fprintf(stderr, "Option %s doesn't have a default value\n", entry);
return GetIniInt(m_section.c_str(), entry, 0, m_ini.c_str()); return GetIniInt(m_section.c_str(), entry, 0, m_ini.c_str());
} }

View File

@ -39,7 +39,10 @@ public:
void* GetModuleHandlePtr(); void* GetModuleHandlePtr();
#ifdef _WIN32 #ifdef _WIN32
HMODULE GetModuleHandle() {return (HMODULE)GetModuleHandlePtr();} HMODULE GetModuleHandle()
{
return (HMODULE)GetModuleHandlePtr();
}
#endif #endif
void BuildConfigurationMap(const char* lpFileName); void BuildConfigurationMap(const char* lpFileName);
@ -59,7 +62,10 @@ public:
void SetConfig(const char* entry, int value); void SetConfig(const char* entry, int value);
// Avoid issue with overloading // Avoid issue with overloading
template <typename T> template <typename T>
T GetConfigT(const char* entry) { return static_cast<T>(GetConfigI(entry)); } T GetConfigT(const char* entry)
{
return static_cast<T>(GetConfigI(entry));
}
int GetConfigI(const char* entry); int GetConfigI(const char* entry);
bool GetConfigB(const char* entry); bool GetConfigB(const char* entry);
std::string GetConfigS(const char* entry); std::string GetConfigS(const char* entry);
@ -87,8 +93,14 @@ public:
std::vector<GSSetting> m_gs_tv_shaders; std::vector<GSSetting> m_gs_tv_shaders;
}; };
struct GSDXError {}; struct GSDXError
struct GSDXRecoverableError : GSDXError {}; {
struct GSDXErrorGlVertexArrayTooSmall : GSDXError {}; };
struct GSDXRecoverableError : GSDXError
{
};
struct GSDXErrorGlVertexArrayTooSmall : GSDXError
{
};
extern GSdxApp theApp; extern GSdxApp theApp;

View File

@ -42,7 +42,8 @@ GSDevice::GSDevice()
GSDevice::~GSDevice() GSDevice::~GSDevice()
{ {
for(auto t : m_pool) delete t; for (auto t : m_pool)
delete t;
delete m_backbuffer; delete m_backbuffer;
delete m_merge; delete m_merge;
@ -60,7 +61,8 @@ bool GSDevice::Create(const std::shared_ptr<GSWnd>& wnd)
bool GSDevice::Reset(int w, int h) bool GSDevice::Reset(int w, int h)
{ {
for(auto t : m_pool) delete t; for (auto t : m_pool)
delete t;
m_pool.clear(); m_pool.clear();
@ -364,7 +366,11 @@ void GSDevice::ShadeBoost()
bool GSDevice::ResizeTexture(GSTexture** t, int type, int w, int h) bool GSDevice::ResizeTexture(GSTexture** t, int type, int w, int h)
{ {
if(t == NULL) {ASSERT(0); return false;} if (t == NULL)
{
ASSERT(0);
return false;
}
GSTexture* t2 = *t; GSTexture* t2 = *t;

View File

@ -124,7 +124,10 @@ enum HWBlendFlags
}; };
// Determines the HW blend function for DX11/OGL // Determines the HW blend function for DX11/OGL
struct HWBlend { uint16 flags, op, src, dst; }; struct HWBlend
{
uint16 flags, op, src, dst;
};
class GSDevice : public GSAlignedClass<32> class GSDevice : public GSAlignedClass<32>
{ {
@ -157,8 +160,14 @@ protected:
GSTexture* m_blend; GSTexture* m_blend;
GSTexture* m_target_tmp; GSTexture* m_target_tmp;
GSTexture* m_current; GSTexture* m_current;
struct {size_t stride, start, count, limit;} m_vertex; struct
struct {size_t start, count, limit;} m_index; {
size_t stride, start, count, limit;
} m_vertex;
struct
{
size_t start, count, limit;
} m_index;
unsigned int m_frame; // for ageing the pool unsigned int m_frame; // for ageing the pool
bool m_linear_present; bool m_linear_present;
@ -180,7 +189,12 @@ public:
void Recycle(GSTexture* t); void Recycle(GSTexture* t);
enum {Windowed, Fullscreen, DontCare}; enum
{
Windowed,
Fullscreen,
DontCare
};
virtual bool Create(const std::shared_ptr<GSWnd>& wnd); virtual bool Create(const std::shared_ptr<GSWnd>& wnd);
virtual bool Reset(int w, int h); virtual bool Reset(int w, int h);

View File

@ -23,7 +23,8 @@
#pragma once #pragma once
template <class T> template <class T>
struct Element { struct Element
{
T data; T data;
uint16 next_index; uint16 next_index;
uint16 prev_index; uint16 prev_index;
@ -33,8 +34,10 @@ template <class T>
class FastListIterator; class FastListIterator;
template <class T> template <class T>
class FastList { class FastList
{
friend class FastListIterator<T>; friend class FastListIterator<T>;
private: private:
// The index of the first element of the list is m_buffer[0].next_index // The index of the first element of the list is m_buffer[0].next_index
// The first Element<T> of the list has prev_index equal to 0 // The first Element<T> of the list has prev_index equal to 0
@ -56,16 +59,19 @@ private:
uint16* m_free_indexes_stack; uint16* m_free_indexes_stack;
public: public:
__forceinline FastList() { __forceinline FastList()
{
m_buffer = nullptr; m_buffer = nullptr;
clear(); clear();
} }
__forceinline ~FastList() { __forceinline ~FastList()
{
_aligned_free(m_buffer); _aligned_free(m_buffer);
} }
void clear() { void clear()
{
// Initialize m_capacity to 4 so we avoid to Grow() on initial insertions // Initialize m_capacity to 4 so we avoid to Grow() on initial insertions
// The code doesn't break if this value is changed with anything from 1 to USHRT_MAX // The code doesn't break if this value is changed with anything from 1 to USHRT_MAX
m_capacity = 4; m_capacity = 4;
@ -83,14 +89,17 @@ public:
m_free_indexes_stack_top = 0; m_free_indexes_stack_top = 0;
// m_buffer index 0 is reserved for auxiliary element // m_buffer index 0 is reserved for auxiliary element
for (uint16 i = 0; i < m_capacity - 1; i++) { for (uint16 i = 0; i < m_capacity - 1; i++)
{
m_free_indexes_stack[i] = i + 1; m_free_indexes_stack[i] = i + 1;
} }
} }
// Insert the element in front of the list and return its position in m_buffer // Insert the element in front of the list and return its position in m_buffer
__forceinline uint16 InsertFront(const T& data) { __forceinline uint16 InsertFront(const T& data)
if (Full()) { {
if (Full())
{
Grow(); Grow();
} }
@ -101,81 +110,99 @@ public:
return free_index; return free_index;
} }
__forceinline void push_front(const T& data) { __forceinline void push_front(const T& data)
{
InsertFront(data); InsertFront(data);
} }
__forceinline const T& back() const { __forceinline const T& back() const
{
return m_buffer[LastIndex()].data; return m_buffer[LastIndex()].data;
} }
__forceinline void pop_back() { __forceinline void pop_back()
{
EraseIndex(LastIndex()); EraseIndex(LastIndex());
} }
__forceinline uint16 size() const { __forceinline uint16 size() const
{
return m_free_indexes_stack_top; return m_free_indexes_stack_top;
} }
__forceinline bool empty() const { __forceinline bool empty() const
{
return size() == 0; return size() == 0;
} }
__forceinline void EraseIndex(const uint16 index) { __forceinline void EraseIndex(const uint16 index)
{
ListRemove(index); ListRemove(index);
m_free_indexes_stack[--m_free_indexes_stack_top] = index; m_free_indexes_stack[--m_free_indexes_stack_top] = index;
} }
__forceinline void MoveFront(const uint16 index) { __forceinline void MoveFront(const uint16 index)
if (FirstIndex() != index) { {
if (FirstIndex() != index)
{
ListRemove(index); ListRemove(index);
ListInsertFront(index); ListInsertFront(index);
} }
} }
__forceinline const FastListIterator<T> begin() const { __forceinline const FastListIterator<T> begin() const
{
return FastListIterator<T>(this, FirstIndex()); return FastListIterator<T>(this, FirstIndex());
} }
__forceinline const FastListIterator<T> end() const { __forceinline const FastListIterator<T> end() const
{
return FastListIterator<T>(this, 0); return FastListIterator<T>(this, 0);
} }
__forceinline FastListIterator<T> erase(FastListIterator<T> i) { __forceinline FastListIterator<T> erase(FastListIterator<T> i)
{
EraseIndex(i.Index()); EraseIndex(i.Index());
return ++i; return ++i;
} }
private: private:
// Accessed by FastListIterator<T> using class friendship // Accessed by FastListIterator<T> using class friendship
__forceinline const T& Data(const uint16 index) const { __forceinline const T& Data(const uint16 index) const
{
return m_buffer[index].data; return m_buffer[index].data;
} }
// Accessed by FastListIterator<T> using class friendship // Accessed by FastListIterator<T> using class friendship
__forceinline uint16 NextIndex(const uint16 index) const { __forceinline uint16 NextIndex(const uint16 index) const
{
return m_buffer[index].next_index; return m_buffer[index].next_index;
} }
// Accessed by FastListIterator<T> using class friendship // Accessed by FastListIterator<T> using class friendship
__forceinline uint16 PrevIndex(const uint16 index) const { __forceinline uint16 PrevIndex(const uint16 index) const
{
return m_buffer[index].prev_index; return m_buffer[index].prev_index;
} }
__forceinline uint16 FirstIndex() const { __forceinline uint16 FirstIndex() const
{
return m_buffer[0].next_index; return m_buffer[0].next_index;
} }
__forceinline uint16 LastIndex() const { __forceinline uint16 LastIndex() const
{
return m_buffer[0].prev_index; return m_buffer[0].prev_index;
} }
__forceinline bool Full() const { __forceinline bool Full() const
{
// The minus one is due to the presence of the auxiliary element // The minus one is due to the presence of the auxiliary element
return size() == m_capacity - 1; return size() == m_capacity - 1;
} }
__forceinline void ListInsertFront(const uint16 index) { __forceinline void ListInsertFront(const uint16 index)
{
// Update prev / next indexes to add m_buffer[index] to the chain // Update prev / next indexes to add m_buffer[index] to the chain
Element<T>& head = m_buffer[0]; Element<T>& head = m_buffer[0];
m_buffer[index].prev_index = 0; m_buffer[index].prev_index = 0;
@ -184,15 +211,18 @@ private:
head.next_index = index; head.next_index = index;
} }
__forceinline void ListRemove(const uint16 index) { __forceinline void ListRemove(const uint16 index)
{
// Update prev / next indexes to remove m_buffer[index] from the chain // Update prev / next indexes to remove m_buffer[index] from the chain
const Element<T>& to_remove = m_buffer[index]; const Element<T>& to_remove = m_buffer[index];
m_buffer[to_remove.prev_index].next_index = to_remove.next_index; m_buffer[to_remove.prev_index].next_index = to_remove.next_index;
m_buffer[to_remove.next_index].prev_index = to_remove.prev_index; m_buffer[to_remove.next_index].prev_index = to_remove.prev_index;
} }
void Grow() { void Grow()
if (m_capacity == USHRT_MAX) { {
if (m_capacity == USHRT_MAX)
{
throw std::runtime_error("FastList size maxed out at USHRT_MAX (65535) elements, cannot grow futhermore."); throw std::runtime_error("FastList size maxed out at USHRT_MAX (65535) elements, cannot grow futhermore.");
} }
@ -210,7 +240,8 @@ private:
m_free_indexes_stack = new_free_indexes_stack; m_free_indexes_stack = new_free_indexes_stack;
// Initialize the additional space in the stack // Initialize the additional space in the stack
for (uint16 i = m_capacity - 1; i < new_capacity - 1; i++) { for (uint16 i = m_capacity - 1; i < new_capacity - 1; i++)
{
m_free_indexes_stack[i] = i + 1; m_free_indexes_stack[i] = i + 1;
} }
@ -228,50 +259,59 @@ private:
uint16 m_index; uint16 m_index;
public: public:
__forceinline FastListIterator(const FastList<T>* fastlist, const uint16 index) { __forceinline FastListIterator(const FastList<T>* fastlist, const uint16 index)
{
m_fastlist = fastlist; m_fastlist = fastlist;
m_index = index; m_index = index;
} }
__forceinline bool operator!=(const FastListIterator<T>& other) const { __forceinline bool operator!=(const FastListIterator<T>& other) const
{
return (m_index != other.m_index); return (m_index != other.m_index);
} }
__forceinline bool operator==(const FastListIterator<T>& other) const { __forceinline bool operator==(const FastListIterator<T>& other) const
{
return (m_index == other.m_index); return (m_index == other.m_index);
} }
// Prefix increment // Prefix increment
__forceinline const FastListIterator<T>& operator++() { __forceinline const FastListIterator<T>& operator++()
{
m_index = m_fastlist->NextIndex(m_index); m_index = m_fastlist->NextIndex(m_index);
return *this; return *this;
} }
// Postfix increment // Postfix increment
__forceinline const FastListIterator<T> operator++(int) { __forceinline const FastListIterator<T> operator++(int)
{
FastListIterator<T> copy(*this); FastListIterator<T> copy(*this);
++(*this); ++(*this);
return copy; return copy;
} }
// Prefix decrement // Prefix decrement
__forceinline const FastListIterator<T>& operator--() { __forceinline const FastListIterator<T>& operator--()
{
m_index = m_fastlist->PrevIndex(m_index); m_index = m_fastlist->PrevIndex(m_index);
return *this; return *this;
} }
// Postfix decrement // Postfix decrement
__forceinline const FastListIterator<T> operator--(int) { __forceinline const FastListIterator<T> operator--(int)
{
FastListIterator<T> copy(*this); FastListIterator<T> copy(*this);
--(*this); --(*this);
return copy; return copy;
} }
__forceinline const T& operator*() const { __forceinline const T& operator*() const
{
return m_fastlist->Data(m_index); return m_fastlist->Data(m_index);
} }
__forceinline uint16 Index() const { __forceinline uint16 Index() const
{
return m_index; return m_index;
} }
}; };

View File

@ -28,7 +28,8 @@
#include "Renderers/SW/GSScanlineEnvironment.h" #include "Renderers/SW/GSScanlineEnvironment.h"
template<class KEY, class VALUE> class GSFunctionMap template <class KEY, class VALUE>
class GSFunctionMap
{ {
protected: protected:
struct ActivePtr struct ActivePtr
@ -53,7 +54,8 @@ public:
virtual ~GSFunctionMap() virtual ~GSFunctionMap()
{ {
for(auto &i : m_map_active) delete i.second; for (auto& i : m_map_active)
delete i.second;
} }
VALUE operator[](KEY key) VALUE operator[](KEY key)

View File

@ -26,14 +26,17 @@
#include "resource.h" #include "resource.h"
#endif #endif
void GSOsdManager::LoadFont() { void GSOsdManager::LoadFont()
{
FT_Error error = FT_New_Face(m_library, theApp.GetConfigS("osd_fontname").c_str(), 0, &m_face); FT_Error error = FT_New_Face(m_library, theApp.GetConfigS("osd_fontname").c_str(), 0, &m_face);
if (error) { if (error)
{
FT_Error error_load_res = 1; FT_Error error_load_res = 1;
if (theApp.LoadResource(IDR_FONT_ROBOTO, resource_data_buffer)) if (theApp.LoadResource(IDR_FONT_ROBOTO, resource_data_buffer))
error_load_res = FT_New_Memory_Face(m_library, (const FT_Byte*)resource_data_buffer.data(), resource_data_buffer.size(), 0, &m_face); error_load_res = FT_New_Memory_Face(m_library, (const FT_Byte*)resource_data_buffer.data(), resource_data_buffer.size(), 0, &m_face);
if (error_load_res) { if (error_load_res)
{
m_face = NULL; m_face = NULL;
fprintf(stderr, "Failed to init freetype face from external and internal resource\n"); fprintf(stderr, "Failed to init freetype face from external and internal resource\n");
if (error == FT_Err_Unknown_File_Format) if (error == FT_Err_Unknown_File_Format)
@ -45,11 +48,14 @@ void GSOsdManager::LoadFont() {
LoadSize(); LoadSize();
} }
void GSOsdManager::LoadSize() { void GSOsdManager::LoadSize()
if (!m_face) return; {
if (!m_face)
return;
FT_Error error = FT_Set_Pixel_Sizes(m_face, 0, m_size);; FT_Error error = FT_Set_Pixel_Sizes(m_face, 0, m_size);
if (error) { if (error)
{
fprintf(stderr, "Failed to init the face size\n"); fprintf(stderr, "Failed to init the face size\n");
return; return;
} }
@ -60,7 +66,8 @@ void GSOsdManager::LoadSize() {
m_atlas_h = m_size + 10; // another random guess m_atlas_h = m_size + 10; // another random guess
} }
GSOsdManager::GSOsdManager() : m_atlas_h(0) GSOsdManager::GSOsdManager()
: m_atlas_h(0)
, m_atlas_w(0) , m_atlas_w(0)
, m_max_width(0) , m_max_width(0)
, m_onscreen_messages(0) , m_onscreen_messages(0)
@ -79,7 +86,8 @@ GSOsdManager::GSOsdManager() : m_atlas_h(0)
m_color = r | (g << 8) | (b << 16) | (255 << 24); m_color = r | (g << 8) | (b << 16) | (255 << 24);
if (FT_Init_FreeType(&m_library)) { if (FT_Init_FreeType(&m_library))
{
m_face = NULL; m_face = NULL;
fprintf(stderr, "Failed to init the freetype library\n"); fprintf(stderr, "Failed to init the freetype library\n");
return; return;
@ -91,24 +99,30 @@ GSOsdManager::GSOsdManager() : m_atlas_h(0)
AddGlyph(' '); AddGlyph(' ');
} }
GSOsdManager::~GSOsdManager() { GSOsdManager::~GSOsdManager()
{
FT_Done_FreeType(m_library); FT_Done_FreeType(m_library);
} }
GSVector2i GSOsdManager::get_texture_font_size() { GSVector2i GSOsdManager::get_texture_font_size()
{
return GSVector2i(m_atlas_w, m_atlas_h); return GSVector2i(m_atlas_w, m_atlas_h);
} }
void GSOsdManager::upload_texture_atlas(GSTexture* t) { void GSOsdManager::upload_texture_atlas(GSTexture* t)
if (!m_face) return; {
if (!m_face)
return;
if (m_char_info.size() > 96) // we only reserved space for this many glyphs if (m_char_info.size() > 96) // we only reserved space for this many glyphs
fprintf(stderr, "More than 96 glyphs needed for OSD"); fprintf(stderr, "More than 96 glyphs needed for OSD");
// This can be sped up a bit by only uploading new glyphs // This can be sped up a bit by only uploading new glyphs
int x = 0; int x = 0;
for(auto &pair : m_char_info) { for (auto& pair : m_char_info)
if(FT_Load_Char(m_face, pair.first, FT_LOAD_RENDER)) { {
if (FT_Load_Char(m_face, pair.first, FT_LOAD_RENDER))
{
fprintf(stderr, "failed to load char U%d\n", (int)pair.first); fprintf(stderr, "failed to load char U%d\n", (int)pair.first);
continue; continue;
} }
@ -127,7 +141,8 @@ void GSOsdManager::upload_texture_atlas(GSTexture* t) {
if (r.width()) if (r.width())
t->Update(r, m_face->glyph->bitmap.buffer, m_face->glyph->bitmap.pitch); t->Update(r, m_face->glyph->bitmap.buffer, m_face->glyph->bitmap.pitch);
if (r.width() > m_max_width) m_max_width = r.width(); if (r.width() > m_max_width)
m_max_width = r.width();
pair.second.tx = (float)x / m_atlas_w; pair.second.tx = (float)x / m_atlas_w;
pair.second.ty = (float)pair.second.bh / m_atlas_h; pair.second.ty = (float)pair.second.bh / m_atlas_h;
@ -142,35 +157,50 @@ void GSOsdManager::upload_texture_atlas(GSTexture* t) {
#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4) #if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4)
/* This is dumb in that it doesn't check for malformed UTF8. This function /* This is dumb in that it doesn't check for malformed UTF8. This function
* is not expected to operate on user input, but only on compiled in strings */ * is not expected to operate on user input, but only on compiled in strings */
void dumb_utf8_to_utf32(const char *utf8, char32_t *utf32, unsigned size) { void dumb_utf8_to_utf32(const char* utf8, char32_t* utf32, unsigned size)
while(*utf8 && --size) { {
if((*utf8 & 0xF1) == 0xF0) { while (*utf8 && --size)
{
if ((*utf8 & 0xF1) == 0xF0)
{
*utf32++ = (utf8[0] & 0x07) << 18 | (utf8[1] & 0x3F) << 12 | (utf8[2] & 0x3F) << 6 | utf8[3] & 0x3F; *utf32++ = (utf8[0] & 0x07) << 18 | (utf8[1] & 0x3F) << 12 | (utf8[2] & 0x3F) << 6 | utf8[3] & 0x3F;
utf8 += 4; utf8 += 4;
} else if((*utf8 & 0xF0) == 0xE0) { }
else if ((*utf8 & 0xF0) == 0xE0)
{
*utf32++ = (utf8[0] & 0x0F) << 12 | (utf8[1] & 0x3F) << 6 | utf8[2] & 0x3F; *utf32++ = (utf8[0] & 0x0F) << 12 | (utf8[1] & 0x3F) << 6 | utf8[2] & 0x3F;
utf8 += 3; utf8 += 3;
} else if((*utf8 & 0xE0) == 0xC0) { }
else if ((*utf8 & 0xE0) == 0xC0)
{
*utf32++ = (utf8[0] & 0x1F) << 6 | utf8[1] & 0x3F; *utf32++ = (utf8[0] & 0x1F) << 6 | utf8[1] & 0x3F;
utf8 += 2; utf8 += 2;
} else if((*utf8 & 0x80) == 0x00) { }
else if ((*utf8 & 0x80) == 0x00)
{
*utf32++ = utf8[0] & 0x7F; *utf32++ = utf8[0] & 0x7F;
utf8 += 1; utf8 += 1;
} }
} }
if(size) *utf32 = *utf8; // Copy NUL char if (size)
*utf32 = *utf8; // Copy NUL char
} }
#endif #endif
void GSOsdManager::AddGlyph(char32_t codepoint) { void GSOsdManager::AddGlyph(char32_t codepoint)
if (!m_face) return; {
if(m_char_info.count(codepoint) == 0) { if (!m_face)
return;
if (m_char_info.count(codepoint) == 0)
{
m_texture_dirty = true; m_texture_dirty = true;
m_char_info[codepoint]; // add it m_char_info[codepoint]; // add it
if(FT_HAS_KERNING(m_face)) { if (FT_HAS_KERNING(m_face))
{
FT_UInt new_glyph = FT_Get_Char_Index(m_face, codepoint); FT_UInt new_glyph = FT_Get_Char_Index(m_face, codepoint);
for(auto pair : m_char_info) { for (auto pair : m_char_info)
{
FT_Vector delta; FT_Vector delta;
FT_UInt glyph_index = FT_Get_Char_Index(m_face, pair.first); FT_UInt glyph_index = FT_Get_Char_Index(m_face, pair.first);
@ -181,14 +211,16 @@ void GSOsdManager::AddGlyph(char32_t codepoint) {
} }
} }
void GSOsdManager::Log(const char *utf8) { void GSOsdManager::Log(const char* utf8)
{
if (!m_log_enabled) if (!m_log_enabled)
return; return;
#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4) #if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4)
char32_t buffer[256]; char32_t buffer[256];
dumb_utf8_to_utf32(utf8, buffer, countof(buffer)); dumb_utf8_to_utf32(utf8, buffer, countof(buffer));
for(char32_t* c = buffer; *c; ++c) AddGlyph(*c); for (char32_t* c = buffer; *c; ++c)
AddGlyph(*c);
#else #else
#if _MSC_VER == 1900 #if _MSC_VER == 1900
std::wstring_convert<std::codecvt_utf8<unsigned int>, unsigned int> conv; std::wstring_convert<std::codecvt_utf8<unsigned int>, unsigned int> conv;
@ -196,24 +228,28 @@ void GSOsdManager::Log(const char *utf8) {
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv; std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
#endif #endif
std::u32string buffer = conv.from_bytes(utf8); std::u32string buffer = conv.from_bytes(utf8);
for(auto const &c : buffer) AddGlyph(c); for (auto const& c : buffer)
AddGlyph(c);
#endif #endif
m_onscreen_messages++; m_onscreen_messages++;
m_log.push_back(log_info{buffer, std::chrono::system_clock::time_point()}); m_log.push_back(log_info{buffer, std::chrono::system_clock::time_point()});
} }
void GSOsdManager::Monitor(const char *key, const char *value) { void GSOsdManager::Monitor(const char* key, const char* value)
{
if (!m_monitor_enabled) if (!m_monitor_enabled)
return; return;
if(value && *value) { if (value && *value)
{
#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4) #if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4)
char32_t buffer[256], vbuffer[256]; char32_t buffer[256], vbuffer[256];
dumb_utf8_to_utf32(key, buffer, countof(buffer)); dumb_utf8_to_utf32(key, buffer, countof(buffer));
dumb_utf8_to_utf32(value, vbuffer, countof(vbuffer)); dumb_utf8_to_utf32(value, vbuffer, countof(vbuffer));
for(char32_t* c = buffer; *c; ++c) AddGlyph(*c); for (char32_t* c = buffer; *c; ++c)
for(char32_t* c = vbuffer; *c; ++c) AddGlyph(*c); AddGlyph(*c);
for (char32_t* c = vbuffer; *c; ++c)
AddGlyph(*c);
#else #else
#if _MSC_VER == 1900 #if _MSC_VER == 1900
std::wstring_convert<std::codecvt_utf8<unsigned int>, unsigned int> conv; std::wstring_convert<std::codecvt_utf8<unsigned int>, unsigned int> conv;
@ -222,11 +258,15 @@ void GSOsdManager::Monitor(const char *key, const char *value) {
#endif #endif
std::u32string buffer = conv.from_bytes(key); std::u32string buffer = conv.from_bytes(key);
std::u32string vbuffer = conv.from_bytes(value); std::u32string vbuffer = conv.from_bytes(value);
for(auto const &c : buffer) AddGlyph(c); for (auto const& c : buffer)
for(auto const &c : vbuffer) AddGlyph(c); AddGlyph(c);
for (auto const& c : vbuffer)
AddGlyph(c);
#endif #endif
m_monitor[buffer] = vbuffer; m_monitor[buffer] = vbuffer;
} else { }
else
{
#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4) #if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4)
char32_t buffer[256]; char32_t buffer[256];
dumb_utf8_to_utf32(key, buffer, countof(buffer)); dumb_utf8_to_utf32(key, buffer, countof(buffer));
@ -242,7 +282,8 @@ void GSOsdManager::Monitor(const char *key, const char *value) {
} }
} }
void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, uint32 color) { void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, uint32 color)
{
float x2 = x + g.bl * (2.0f / m_real_size.x); float x2 = x + g.bl * (2.0f / m_real_size.x);
float y2 = -y - g.bt * (2.0f / m_real_size.y); float y2 = -y - g.bt * (2.0f / m_real_size.y);
float w = g.bw * (2.0f / m_real_size.x); float w = g.bw * (2.0f / m_real_size.x);
@ -274,10 +315,13 @@ void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, fl
++dst; ++dst;
} }
void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, uint32 color) { void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, uint32 color)
{
char32_t p = 0; char32_t p = 0;
for(const auto & c : msg) { for (const auto& c : msg)
if(p) { {
if (p)
{
x += m_kern_info[std::make_pair(p, c)] * (2.0f / m_real_size.x); x += m_kern_info[std::make_pair(p, c)] * (2.0f / m_real_size.x);
} }
@ -293,36 +337,47 @@ void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, floa
} }
} }
size_t GSOsdManager::Size() { size_t GSOsdManager::Size()
{
size_t sum = 0; size_t sum = 0;
if(m_log_enabled) { if (m_log_enabled)
{
float offset = 0; float offset = 0;
for(auto it = m_log.begin(); it != m_log.end(); ++it) { for (auto it = m_log.begin(); it != m_log.end(); ++it)
{
float y = 1 - ((m_size + 2) * (it - m_log.begin() + 1)) * (2.0f / m_real_size.y); float y = 1 - ((m_size + 2) * (it - m_log.begin() + 1)) * (2.0f / m_real_size.y);
if(y + offset < -1) break; if (y + offset < -1)
break;
std::chrono::duration<float> elapsed; std::chrono::duration<float> elapsed;
if(it->OnScreen.time_since_epoch().count() == 0) { if (it->OnScreen.time_since_epoch().count() == 0)
{
elapsed = std::chrono::seconds(0); elapsed = std::chrono::seconds(0);
} else { }
else
{
elapsed = std::chrono::system_clock::now() - it->OnScreen; elapsed = std::chrono::system_clock::now() - it->OnScreen;
if(elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages) { if (elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages)
{
continue; continue;
} }
} }
float ratio = (elapsed - std::chrono::seconds(m_log_timeout / 2)).count() / std::chrono::seconds(m_log_timeout / 2).count(); float ratio = (elapsed - std::chrono::seconds(m_log_timeout / 2)).count() / std::chrono::seconds(m_log_timeout / 2).count();
ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : ratio; ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f :
ratio;
y += offset += ((m_size + 2) * (2.0f / m_real_size.y)) * ratio; y += offset += ((m_size + 2) * (2.0f / m_real_size.y)) * ratio;
sum += it->msg.size(); sum += it->msg.size();
} }
} }
if(m_monitor_enabled) { if (m_monitor_enabled)
for(const auto &pair : m_monitor) { {
for (const auto& pair : m_monitor)
{
sum += pair.first.size(); sum += pair.first.size();
sum += pair.second.size(); sum += pair.second.size();
} }
@ -331,12 +386,15 @@ size_t GSOsdManager::Size() {
return sum * 6; return sum * 6;
} }
float GSOsdManager::StringSize(const std::u32string msg) { float GSOsdManager::StringSize(const std::u32string msg)
{
char32_t p = 0; char32_t p = 0;
float x = 0.0; float x = 0.0;
for(auto c : msg) { for (auto c : msg)
if(p) { {
if (p)
{
x += m_kern_info[std::make_pair(p, c)] * (2.0f / m_real_size.x); x += m_kern_info[std::make_pair(p, c)] * (2.0f / m_real_size.x);
} }
@ -349,30 +407,36 @@ float GSOsdManager::StringSize(const std::u32string msg) {
return x; return x;
} }
size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) { size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count)
{
size_t drawn = 0; size_t drawn = 0;
float opacity = m_opacity * 0.01f; float opacity = m_opacity * 0.01f;
if(m_log_enabled) { if (m_log_enabled)
{
float offset = 0; float offset = 0;
for(auto it = m_log.begin(); it != m_log.end();) { for (auto it = m_log.begin(); it != m_log.end();)
{
float x = -1 + 8 * (2.0f / m_real_size.x); float x = -1 + 8 * (2.0f / m_real_size.x);
float y = 1 - ((m_size + 2) * (it - m_log.begin() + 1)) * (2.0f / m_real_size.y); float y = 1 - ((m_size + 2) * (it - m_log.begin() + 1)) * (2.0f / m_real_size.y);
if(y + offset < -1) break; if (y + offset < -1)
break;
if (it->OnScreen.time_since_epoch().count() == 0) if (it->OnScreen.time_since_epoch().count() == 0)
it->OnScreen = std::chrono::system_clock::now(); it->OnScreen = std::chrono::system_clock::now();
std::chrono::duration<float> elapsed = std::chrono::system_clock::now() - it->OnScreen; std::chrono::duration<float> elapsed = std::chrono::system_clock::now() - it->OnScreen;
if(elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages) { if (elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages)
{
m_onscreen_messages--; m_onscreen_messages--;
it = m_log.erase(it); it = m_log.erase(it);
continue; continue;
} }
if(it->msg.size() * 6 > count - drawn) break; if (it->msg.size() * 6 > count - drawn)
break;
float ratio = (elapsed - std::chrono::seconds(m_log_timeout / 2)).count() / std::chrono::seconds(m_log_timeout / 2).count(); float ratio = (elapsed - std::chrono::seconds(m_log_timeout / 2)).count() / std::chrono::seconds(m_log_timeout / 2).count();
ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : ratio; ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : ratio;
@ -387,13 +451,15 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) {
} }
} }
if(m_monitor_enabled) { if (m_monitor_enabled)
{
// pair.first is the key and second is the value and color // pair.first is the key and second is the value and color
// Since the monitor is right justified, but we render from left to right // Since the monitor is right justified, but we render from left to right
// we need to find the longest string // we need to find the longest string
float first_max = 0.0, second_max = 0.0; float first_max = 0.0, second_max = 0.0;
for(const auto &pair : m_monitor) { for (const auto& pair : m_monitor)
{
float first_len = StringSize(pair.first); float first_len = StringSize(pair.first);
float second_len = StringSize(pair.second); float second_len = StringSize(pair.second);
@ -402,8 +468,10 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) {
} }
size_t line = 1; size_t line = 1;
for(const auto &pair : m_monitor) { for (const auto& pair : m_monitor)
if((pair.first.size() + pair.second.size()) * 6 > count - drawn) break; {
if ((pair.first.size() + pair.second.size()) * 6 > count - drawn)
break;
// Calculate where to start rendering from by taking the right most position 1.0 // Calculate where to start rendering from by taking the right most position 1.0
// and subtracting (going left) 8 scaled pixels for a margin, then subtracting // and subtracting (going left) 8 scaled pixels for a margin, then subtracting
@ -432,4 +500,3 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) {
return drawn; return drawn;
} }

View File

@ -28,8 +28,10 @@
#include <ft2build.h> #include <ft2build.h>
#include FT_FREETYPE_H #include FT_FREETYPE_H
class GSOsdManager { class GSOsdManager
struct glyph_info { {
struct glyph_info
{
int32 ax; // advance.x int32 ax; // advance.x
int32 ay; // advance.y int32 ay; // advance.y
@ -56,7 +58,8 @@ class GSOsdManager {
int32 m_max_width; int32 m_max_width;
int32 m_onscreen_messages; int32 m_onscreen_messages;
struct log_info { struct log_info
{
std::u32string msg; std::u32string msg;
std::chrono::system_clock::time_point OnScreen; std::chrono::system_clock::time_point OnScreen;
}; };
@ -77,7 +80,6 @@ class GSOsdManager {
int m_max_onscreen_messages; int m_max_onscreen_messages;
public: public:
GSOsdManager(); GSOsdManager();
~GSOsdManager(); ~GSOsdManager();
@ -97,6 +99,5 @@ class GSOsdManager {
size_t GeneratePrimitives(GSVertexPT1* dst, size_t count); size_t GeneratePrimitives(GSVertexPT1* dst, size_t count);
private: private:
std::vector<char> resource_data_buffer; std::vector<char> resource_data_buffer;
}; };

View File

@ -80,7 +80,8 @@ bool GSRenderer::CreateDevice(GSDevice* dev)
void GSRenderer::ResetDevice() void GSRenderer::ResetDevice()
{ {
if(m_dev) m_dev->Reset(1, 1); if (m_dev)
m_dev->Reset(1, 1);
} }
bool GSRenderer::Merge(int field) bool GSRenderer::Merge(int field)
@ -187,9 +188,12 @@ bool GSRenderer::Merge(int field)
} }
else else
{ {
if(en[0]) tex[0] = GetOutput(0, y_offset[0]); if (en[0])
if(en[1]) tex[1] = GetOutput(1, y_offset[1]); tex[0] = GetOutput(0, y_offset[0]);
if(feedback_merge) tex[2] = GetFeedbackOutput(); if (en[1])
tex[1] = GetOutput(1, y_offset[1]);
if (feedback_merge)
tex[2] = GetFeedbackOutput();
} }
GSVector4 src[2]; GSVector4 src[2];
@ -198,7 +202,8 @@ bool GSRenderer::Merge(int field)
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
{ {
if(!en[i] || !tex[i]) continue; if (!en[i] || !tex[i])
continue;
GSVector4i r = fr[i]; GSVector4i r = fr[i];
GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy();
@ -309,7 +314,8 @@ void GSRenderer::SetVSync(int vsync)
{ {
m_vsync = vsync; m_vsync = vsync;
if(m_dev) m_dev->SetVSync(m_vsync); if (m_dev)
m_dev->SetVSync(m_vsync);
} }
void GSRenderer::VSync(int field) void GSRenderer::VSync(int field)
@ -370,8 +376,7 @@ void GSRenderer::VSync(int field)
(int)m_perfmon.Get(GSPerfMon::Draw), (int)m_perfmon.Get(GSPerfMon::Draw),
m_perfmon.CPU(), m_perfmon.CPU(),
m_perfmon.Get(GSPerfMon::Swizzle) / 1024, m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 m_perfmon.Get(GSPerfMon::Unswizzle) / 1024);
);
double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); double fillrate = m_perfmon.Get(GSPerfMon::Fillrate);
@ -617,7 +622,6 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
printf("GSdx: Dithering is now %s.\n", dither_msg[m_dithering]); printf("GSdx: Dithering is now %s.\n", dither_msg[m_dithering]);
return; return;
} }
} }
} }

View File

@ -35,15 +35,32 @@ protected:
bool m_sparse; bool m_sparse;
public: public:
struct GSMap {uint8* bits; int pitch;}; struct GSMap
{
uint8* bits;
int pitch;
};
enum {RenderTarget = 1, DepthStencil, Texture, Offscreen, Backbuffer, SparseRenderTarget, SparseDepthStencil}; enum
{
RenderTarget = 1,
DepthStencil,
Texture,
Offscreen,
Backbuffer,
SparseRenderTarget,
SparseDepthStencil
};
public: public:
GSTexture(); GSTexture();
virtual ~GSTexture() {} virtual ~GSTexture() {}
virtual operator bool() {ASSERT(0); return false;} virtual operator bool()
{
ASSERT(0);
return false;
}
virtual bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) = 0; virtual bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) = 0;
virtual bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) = 0; virtual bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) = 0;
@ -62,7 +79,7 @@ public:
int GetType() const { return m_type; } int GetType() const { return m_type; }
int GetFormat() const { return m_format; } int GetFormat() const { return m_format; }
virtual void CommitPages(const GSVector2i& region, bool commit) {}; virtual void CommitPages(const GSVector2i& region, bool commit) {}
void CommitRegion(const GSVector2i& region); void CommitRegion(const GSVector2i& region);
void Commit(); void Commit();
void Uncommit(); void Uncommit();

View File

@ -50,11 +50,22 @@ struct alignas(32) GSVertex
GSVertex() = default; // Warning object is potentially used in hot path GSVertex() = default; // Warning object is potentially used in hot path
#if _M_SSE >= 0x500 #if _M_SSE >= 0x500
GSVertex(const GSVertex& v) {mx = v.mx;} GSVertex(const GSVertex& v)
{
mx = v.mx;
}
void operator=(const GSVertex& v) { mx = v.mx; } void operator=(const GSVertex& v) { mx = v.mx; }
#else #else
GSVertex(const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];} GSVertex(const GSVertex& v)
void operator = (const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];} {
m[0] = v.m[0];
m[1] = v.m[1];
}
void operator=(const GSVertex& v)
{
m[0] = v.m[0];
m[1] = v.m[1];
}
#endif #endif
}; };

View File

@ -21,7 +21,8 @@
#pragma once #pragma once
template <class Vertex> class GSVertexList template <class Vertex>
class GSVertexList
{ {
void* m_base; void* m_base;
Vertex* m_v[3]; Vertex* m_v[3];

View File

@ -68,7 +68,8 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
// Potential float overflow detected. Better uses the slower division instead // Potential float overflow detected. Better uses the slower division instead
// Note: If Q is too big, 1/Q will end up as 0. 1e30 is a random number // Note: If Q is too big, 1/Q will end up as 0. 1e30 is a random number
// that feel big enough. // that feel big enough.
if (!fst && !m_accurate_stq && m_min.t.z > 1e30) { if (!fst && !m_accurate_stq && m_min.t.z > 1e30)
{
fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z); fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z);
m_accurate_stq = true; m_accurate_stq = true;
(this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count); (this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count);
@ -79,7 +80,8 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
m_alpha.valid = false; m_alpha.valid = false;
// I'm not sure of the cost. In doubt let's do it only when depth is enabled // I'm not sure of the cost. In doubt let's do it only when depth is enabled
if(m_state->m_context->TEST.ZTE == 1 && m_state->m_context->TEST.ZTST > ZTST_ALWAYS) { if (m_state->m_context->TEST.ZTE == 1 && m_state->m_context->TEST.ZTST > ZTST_ALWAYS)
{
CorrectDepthTrace(vertex, v_count); CorrectDepthTrace(vertex, v_count);
} }
@ -104,7 +106,12 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K); GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K);
if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;} if (m_lod.x > m_lod.y)
{
float tmp = m_lod.x;
m_lod.x = m_lod.y;
m_lod.y = tmp;
}
} }
else else
{ {
@ -518,21 +525,29 @@ void GSVertexTrace::CorrectDepthTrace(const void* vertex, int count)
uint32 z = v[0].XYZ.Z; uint32 z = v[0].XYZ.Z;
// ought to check only 1/2 for sprite // ought to check only 1/2 for sprite
if (z & 1) { if (z & 1)
{
// Check that first bit is always 1 // Check that first bit is always 1
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++)
{
z &= v[i].XYZ.Z; z &= v[i].XYZ.Z;
} }
} else { }
else
{
// Check that first bit is always 0 // Check that first bit is always 0
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++)
{
z |= v[i].XYZ.Z; z |= v[i].XYZ.Z;
} }
} }
if (z == v[0].XYZ.Z) { if (z == v[0].XYZ.Z)
{
m_eq.z = 1; m_eq.z = 1;
} else { }
else
{
m_eq.z = 0; m_eq.z = 0;
} }
} }

View File

@ -34,8 +34,16 @@ class alignas(32) GSVertexTrace : public GSAlignedClass<32>
BiFiltering m_force_filter; BiFiltering m_force_filter;
public: public:
struct Vertex {GSVector4i c; GSVector4 p, t;}; struct Vertex
struct VertexAlpha {int min, max; bool valid;}; {
GSVector4i c;
GSVector4 p, t;
};
struct VertexAlpha
{
int min, max;
bool valid;
};
bool m_accurate_stq; bool m_accurate_stq;
protected: protected:

View File

@ -160,8 +160,7 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
const HRESULT result = D3D11CreateDevice( const HRESULT result = D3D11CreateDevice(
adapter, driver_type, nullptr, flags, adapter, driver_type, nullptr, flags,
supported_levels.data(), supported_levels.size(), supported_levels.data(), supported_levels.size(),
D3D11_SDK_VERSION, &m_dev, &level, &m_ctx D3D11_SDK_VERSION, &m_dev, &level, &m_ctx);
);
if (FAILED(result)) if (FAILED(result))
{ {
@ -189,8 +188,7 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
const HRESULT result = m_factory->CreateSwapChainForHwnd( const HRESULT result = m_factory->CreateSwapChainForHwnd(
m_dev, reinterpret_cast<HWND>(m_wnd->GetHandle()), m_dev, reinterpret_cast<HWND>(m_wnd->GetHandle()),
&swapchain_description, nullptr, nullptr, &m_swapchain &swapchain_description, nullptr, nullptr, &m_swapchain);
);
if (FAILED(result)) if (FAILED(result))
{ {
@ -208,7 +206,8 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
else else
m_d3d_texsize = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; m_d3d_texsize = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
{ // HACK: check nVIDIA {
// HACK: check nVIDIA
// Note: It can cause issues on several games such as SOTC, Fatal Frame, plus it adds border offset. // Note: It can cause issues on several games such as SOTC, Fatal Frame, plus it adds border offset.
bool disable_safe_features = theApp.GetConfigB("UserHacks") && theApp.GetConfigB("UserHacks_Disable_Safe_Features"); bool disable_safe_features = theApp.GetConfigB("UserHacks") && theApp.GetConfigB("UserHacks_Disable_Safe_Features");
m_hack_topleft_offset = (m_upscale_multiplier != 1 && nvidia_vendor && !disable_safe_features) ? -0.01f : 0.0f; m_hack_topleft_offset = (m_upscale_multiplier != 1 && nvidia_vendor && !disable_safe_features) ? -0.01f : 0.0f;
@ -435,8 +434,7 @@ bool GSDevice11::Create(const std::shared_ptr<GSWnd> &wnd)
GSVector2i tex_font = m_osd.get_texture_font_size(); GSVector2i tex_font = m_osd.get_texture_font_size();
m_font = std::unique_ptr<GSTexture>( m_font = std::unique_ptr<GSTexture>(
CreateSurface(GSTexture::Texture, tex_font.x, tex_font.y, DXGI_FORMAT_R8_UNORM) CreateSurface(GSTexture::Texture, tex_font.x, tex_font.y, DXGI_FORMAT_R8_UNORM));
);
return true; return true;
} }
@ -553,13 +551,15 @@ void GSDevice11::DrawIndexedPrimitive(int offset, int count)
void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c) void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
{ {
if (!t) return; if (!t)
return;
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v); m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v);
} }
void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c) void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c)
{ {
if (!t) return; if (!t)
return;
GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255); GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255);
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, color.v); m_ctx->ClearRenderTargetView(*(GSTexture11*)t, color.v);
@ -567,13 +567,15 @@ void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c)
void GSDevice11::ClearDepth(GSTexture* t) void GSDevice11::ClearDepth(GSTexture* t)
{ {
if (!t) return; if (!t)
return;
m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_DEPTH, 0.0f, 0); m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_DEPTH, 0.0f, 0);
} }
void GSDevice11::ClearStencil(GSTexture* t, uint8 c) void GSDevice11::ClearStencil(GSTexture* t, uint8 c)
{ {
if (!t) return; if (!t)
return;
m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_STENCIL, 0, c); m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_STENCIL, 0, c);
} }
@ -758,8 +760,8 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
return; return;
} }
bool draw_in_depth = (ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT32] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT24] || bool draw_in_depth = (ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT32] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT24]
ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]); || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]);
BeginScene(); BeginScene();
@ -837,7 +839,8 @@ void GSDevice11::RenderOsd(GSTexture* dt)
OMSetBlendState(m_merge.bs, 0); OMSetBlendState(m_merge.bs, 0);
OMSetRenderTargets(dt, NULL); OMSetRenderTargets(dt, NULL);
if(m_osd.m_texture_dirty) { if (m_osd.m_texture_dirty)
{
m_osd.upload_texture_atlas(m_font.get()); m_osd.upload_texture_atlas(m_font.get());
} }
@ -911,7 +914,8 @@ void GSDevice11::InitExternalFX()
{ {
if (!ExShader_Compiled) if (!ExShader_Compiled)
{ {
try { try
{
std::string config_name(theApp.GetConfigS("shaderfx_conf")); std::string config_name(theApp.GetConfigS("shaderfx_conf"));
std::ifstream fconfig(config_name); std::ifstream fconfig(config_name);
std::stringstream shader; std::stringstream shader;
@ -935,7 +939,8 @@ void GSDevice11::InitExternalFX()
fprintf(stderr, "GSdx: External shader '%s' not loaded and will be disabled!\n", shader_name.c_str()); fprintf(stderr, "GSdx: External shader '%s' not loaded and will be disabled!\n", shader_name.c_str());
} }
} }
catch (GSDXRecoverableError) { catch (GSDXRecoverableError)
{
printf("GSdx: failed to compile external post-processing shader. \n"); printf("GSdx: failed to compile external post-processing shader. \n");
} }
ExShader_Compiled = true; ExShader_Compiled = true;
@ -968,13 +973,15 @@ void GSDevice11::InitFXAA()
{ {
if (!FXAA_Compiled) if (!FXAA_Compiled)
{ {
try { try
{
std::vector<char> shader; std::vector<char> shader;
theApp.LoadResource(IDR_FXAA_FX, shader); theApp.LoadResource(IDR_FXAA_FX, shader);
ShaderMacro sm(m_shader.model); ShaderMacro sm(m_shader.model);
CreateShader(shader, "fxaa.fx", nullptr, "ps_main", sm.GetPtr(), &m_fxaa.ps); CreateShader(shader, "fxaa.fx", nullptr, "ps_main", sm.GetPtr(), &m_fxaa.ps);
} }
catch (GSDXRecoverableError) { catch (GSDXRecoverableError)
{
printf("GSdx: failed to compile fxaa shader.\n"); printf("GSdx: failed to compile fxaa shader.\n");
} }
FXAA_Compiled = true; FXAA_Compiled = true;
@ -1102,7 +1109,8 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
hr = m_dev->CreateBuffer(&bd, NULL, &m_vb); hr = m_dev->CreateBuffer(&bd, NULL, &m_vb);
if(FAILED(hr)) return false; if (FAILED(hr))
return false;
} }
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
@ -1178,7 +1186,8 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
hr = m_dev->CreateBuffer(&bd, NULL, &m_ib); hr = m_dev->CreateBuffer(&bd, NULL, &m_ib);
if(FAILED(hr)) return; if (FAILED(hr))
return;
} }
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
@ -1283,7 +1292,8 @@ void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
{ {
ID3D11ShaderResourceView* srv = NULL; ID3D11ShaderResourceView* srv = NULL;
if(sr) srv = *(GSTexture11*)sr; if (sr)
srv = *(GSTexture11*)sr;
PSSetShaderResourceView(i, srv, sr); PSSetShaderResourceView(i, srv, sr);
} }
@ -1494,8 +1504,7 @@ void GSDevice11::CompileShader(const std::vector<char>& source, const char* fn,
const HRESULT hr = D3DCompile( const HRESULT hr = D3DCompile(
source.data(), source.size(), fn, macro, source.data(), source.size(), fn, macro,
include, entry, shader_model.c_str(), include, entry, shader_model.c_str(),
flags, 0, shader, &error flags, 0, shader, &error);
);
if (error) if (error)
fprintf(stderr, "%s\n", (const char*)error->GetBufferPointer()); fprintf(stderr, "%s\n", (const char*)error->GetBufferPointer());

View File

@ -89,8 +89,14 @@ public:
operator uint32() const { return key; } operator uint32() const { return key; }
VSSelector() : key(0) {} VSSelector()
VSSelector(uint32 k) : key(k) {} : key(0)
{
}
VSSelector(uint32 k)
: key(k)
{
}
}; };
struct alignas(32) PSConstantBuffer struct alignas(32) PSConstantBuffer
@ -192,8 +198,14 @@ public:
operator uint32() { return key; } operator uint32() { return key; }
GSSelector() : key(0) {} GSSelector()
GSSelector(uint32 k) : key(k) {} : key(0)
{
}
GSSelector(uint32 k)
: key(k)
{
}
}; };
struct PSSelector struct PSSelector
@ -260,7 +272,10 @@ public:
operator uint64() { return key; } operator uint64() { return key; }
PSSelector() : key(0) {} PSSelector()
: key(0)
{
}
}; };
struct PSSamplerSelector struct PSSamplerSelector
@ -279,7 +294,10 @@ public:
operator uint32() { return key & 0x7; } operator uint32() { return key & 0x7; }
PSSamplerSelector() : key(0) {} PSSamplerSelector()
: key(0)
{
}
}; };
struct OMDepthStencilSelector struct OMDepthStencilSelector
@ -300,7 +318,10 @@ public:
operator uint32() { return key & 0x3f; } operator uint32() { return key & 0x3f; }
OMDepthStencilSelector() : key(0) {} OMDepthStencilSelector()
: key(0)
{
}
}; };
struct OMBlendSelector struct OMBlendSelector
@ -331,7 +352,10 @@ public:
operator uint32() { return key & 0x1fff; } operator uint32() { return key & 0x1fff; }
OMBlendSelector() : key(0) {} OMBlendSelector()
: key(0)
{
}
}; };
#pragma pack(pop) #pragma pack(pop)
@ -341,13 +365,21 @@ public:
struct mcstr struct mcstr
{ {
const char *name, *def; const char *name, *def;
mcstr(const char* n, const char* d) : name(n), def(d) {} mcstr(const char* n, const char* d)
: name(n)
, def(d)
{
}
}; };
struct mstring struct mstring
{ {
std::string name, def; std::string name, def;
mstring(const char* n, std::string d) : name(n), def(d) {} mstring(const char* n, std::string d)
: name(n)
, def(d)
{
}
}; };
std::vector<mstring> mlist; std::vector<mstring> mlist;
@ -494,7 +526,12 @@ private:
std::unique_ptr<GSTexture> m_font; std::unique_ptr<GSTexture> m_font;
protected: protected:
struct {D3D_FEATURE_LEVEL level; std::string model, vs, gs, ps, cs;} m_shader; struct
{
D3D_FEATURE_LEVEL level;
std::string model, vs, gs, ps, cs;
} m_shader;
public: public:
GSDevice11(); GSDevice11();
virtual ~GSDevice11() {} virtual ~GSDevice11() {}
@ -568,4 +605,3 @@ public:
void CompileShader(const std::vector<char>& source, const char* fn, ID3DInclude* include, const char* entry, D3D_SHADER_MACRO* macro, ID3DBlob** shader, std::string shader_model); void CompileShader(const std::vector<char>& source, const char* fn, ID3DInclude* include, const char* entry, D3D_SHADER_MACRO* macro, ID3DBlob** shader, std::string shader_model);
}; };

View File

@ -102,7 +102,8 @@ void GSRendererDX11::SetupIA(const float& sx, const float& sy)
for (unsigned int i = 0; i < m_vertex.next; i++) for (unsigned int i = 0; i < m_vertex.next; i++)
{ {
if (PRIM->TME && PRIM->FST) d[i].UV &= 0x3FEF3FEF; if (PRIM->TME && PRIM->FST)
d[i].UV &= 0x3FEF3FEF;
} }
} }
@ -401,7 +402,6 @@ void GSRendererDX11::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache:
// fprintf(stderr, "%d: Green channel (wrong mask) (fbmask %x)\n", s_n, m_context->FRAME.FBMSK >> 24); // fprintf(stderr, "%d: Green channel (wrong mask) (fbmask %x)\n", s_n, m_context->FRAME.FBMSK >> 24);
m_ps_sel.channel = ChannelFetch_GREEN; m_ps_sel.channel = ChannelFetch_GREEN;
} }
} }
else if (green) else if (green)
{ {
@ -491,7 +491,8 @@ void GSRendererDX11::EmulateBlending()
case ACC_BLEND_BASIC_D3D11: case ACC_BLEND_BASIC_D3D11:
sw_blending |= accumulation_blend || blend_non_recursive; sw_blending |= accumulation_blend || blend_non_recursive;
[[fallthrough]]; [[fallthrough]];
default: break; default:
break;
} }
// Color clip // Color clip
@ -532,7 +533,8 @@ void GSRendererDX11::EmulateBlending()
{ {
m_om_bsel.accu_blend = 1; m_om_bsel.accu_blend = 1;
if (ALPHA.A == 2) { if (ALPHA.A == 2)
{
// The blend unit does a reverse subtraction so it means // The blend unit does a reverse subtraction so it means
// the shader must output a positive value. // the shader must output a positive value.
// Replace 0 - Cs by Cs - 0 // Replace 0 - Cs by Cs - 0
@ -619,7 +621,6 @@ void GSRendererDX11::EmulateTextureSampler(const GSTextureCache::Source* tex)
GSVector4 half_offset = RealignTargetTextureCoordinate(tex); GSVector4 half_offset = RealignTargetTextureCoordinate(tex);
vs_cb.Texture_Scale_Offset.z = half_offset.x; vs_cb.Texture_Scale_Offset.z = half_offset.x;
vs_cb.Texture_Scale_Offset.w = half_offset.y; vs_cb.Texture_Scale_Offset.w = half_offset.y;
} }
else if (tex->m_target) else if (tex->m_target)
{ {
@ -684,7 +685,6 @@ void GSRendererDX11::EmulateTextureSampler(const GSTextureCache::Source* tex)
// Note 4 bits indexes are converted to 8 bits // Note 4 bits indexes are converted to 8 bits
m_ps_sel.fmt = 3 << 2; m_ps_sel.fmt = 3 << 2;
} }
else else
{ {

View File

@ -27,7 +27,8 @@
class GSRendererDX11 final : public GSRendererHW class GSRendererDX11 final : public GSRendererHW
{ {
enum ACC_BLEND_D3D11 { enum ACC_BLEND_D3D11
{
ACC_BLEND_NONE_D3D11 = 0, ACC_BLEND_NONE_D3D11 = 0,
ACC_BLEND_BASIC_D3D11 = 1, ACC_BLEND_BASIC_D3D11 = 1,
ACC_BLEND_MEDIUM_D3D11 = 2, ACC_BLEND_MEDIUM_D3D11 = 2,

View File

@ -36,10 +36,14 @@ GSTexture11::GSTexture11(ID3D11Texture2D* texture)
m_size.x = (int)m_desc.Width; m_size.x = (int)m_desc.Width;
m_size.y = (int)m_desc.Height; m_size.y = (int)m_desc.Height;
if(m_desc.BindFlags & D3D11_BIND_RENDER_TARGET) m_type = RenderTarget; if (m_desc.BindFlags & D3D11_BIND_RENDER_TARGET)
else if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) m_type = DepthStencil; m_type = RenderTarget;
else if(m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) m_type = Texture; else if (m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL)
else if(m_desc.Usage == D3D11_USAGE_STAGING) m_type = Offscreen; m_type = DepthStencil;
else if (m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE)
m_type = Texture;
else if (m_desc.Usage == D3D11_USAGE_STAGING)
m_type = Offscreen;
m_format = (int)m_desc.Format; m_format = (int)m_desc.Format;

View File

@ -121,13 +121,15 @@ void GSTextureCache11::Read(Source* t, const GSVector4i& r)
const GIFRegTEX0& TEX0 = t->m_TEX0; const GIFRegTEX0& TEX0 = t->m_TEX0;
if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) { if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height()))
{
m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r); m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r);
GSTexture::GSMap m; GSTexture::GSMap m;
GSVector4i r_offscreen(0, 0, r.width(), r.height()); GSVector4i r_offscreen(0, 0, r.width(), r.height());
if (offscreen->Map(m, &r_offscreen)) { if (offscreen->Map(m, &r_offscreen))
{
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);

View File

@ -38,7 +38,8 @@ bool GSDevice11::CreateTextureFX()
hr = m_dev->CreateBuffer(&bd, NULL, &m_vs_cb); hr = m_dev->CreateBuffer(&bd, NULL, &m_vs_cb);
if(FAILED(hr)) return false; if (FAILED(hr))
return false;
memset(&bd, 0, sizeof(bd)); memset(&bd, 0, sizeof(bd));
@ -48,7 +49,8 @@ bool GSDevice11::CreateTextureFX()
hr = m_dev->CreateBuffer(&bd, NULL, &m_gs_cb); hr = m_dev->CreateBuffer(&bd, NULL, &m_gs_cb);
if (FAILED(hr)) return false; if (FAILED(hr))
return false;
memset(&bd, 0, sizeof(bd)); memset(&bd, 0, sizeof(bd));
@ -58,7 +60,8 @@ bool GSDevice11::CreateTextureFX()
hr = m_dev->CreateBuffer(&bd, NULL, &m_ps_cb); hr = m_dev->CreateBuffer(&bd, NULL, &m_ps_cb);
if(FAILED(hr)) return false; if (FAILED(hr))
return false;
D3D11_SAMPLER_DESC sd; D3D11_SAMPLER_DESC sd;
@ -75,7 +78,8 @@ bool GSDevice11::CreateTextureFX()
hr = m_dev->CreateSamplerState(&sd, &m_palette_ss); hr = m_dev->CreateSamplerState(&sd, &m_palette_ss);
if(FAILED(hr)) return false; if (FAILED(hr))
return false;
// create layout // create layout

View File

@ -44,7 +44,8 @@ GSRendererHW::GSRendererHW(GSTextureCache* tc)
m_conservative_framebuffer = theApp.GetConfigB("conservative_framebuffer"); m_conservative_framebuffer = theApp.GetConfigB("conservative_framebuffer");
m_accurate_date = theApp.GetConfigB("accurate_date"); m_accurate_date = theApp.GetConfigB("accurate_date");
if (theApp.GetConfigB("UserHacks")) { if (theApp.GetConfigB("UserHacks"))
{
m_userhacks_enabled_gs_mem_clear = !theApp.GetConfigB("UserHacks_Disable_Safe_Features"); m_userhacks_enabled_gs_mem_clear = !theApp.GetConfigB("UserHacks_Disable_Safe_Features");
m_userHacks_enabled_unscale_ptln = !theApp.GetConfigB("UserHacks_Disable_Safe_Features"); m_userHacks_enabled_unscale_ptln = !theApp.GetConfigB("UserHacks_Disable_Safe_Features");
m_userhacks_align_sprite_X = theApp.GetConfigB("UserHacks_align_sprite_X"); m_userhacks_align_sprite_X = theApp.GetConfigB("UserHacks_align_sprite_X");
@ -55,7 +56,9 @@ GSRendererHW::GSRendererHW(GSTextureCache* tc)
m_userhacks_tcoffset_x = theApp.GetConfigI("UserHacks_TCOffsetX") / -1000.0f; m_userhacks_tcoffset_x = theApp.GetConfigI("UserHacks_TCOffsetX") / -1000.0f;
m_userhacks_tcoffset_y = theApp.GetConfigI("UserHacks_TCOffsetY") / -1000.0f; m_userhacks_tcoffset_y = theApp.GetConfigI("UserHacks_TCOffsetY") / -1000.0f;
m_userhacks_tcoffset = m_userhacks_tcoffset_x < 0.0f || m_userhacks_tcoffset_y < 0.0f; m_userhacks_tcoffset = m_userhacks_tcoffset_x < 0.0f || m_userhacks_tcoffset_y < 0.0f;
} else { }
else
{
m_userhacks_enabled_gs_mem_clear = true; m_userhacks_enabled_gs_mem_clear = true;
m_userHacks_enabled_unscale_ptln = true; m_userHacks_enabled_unscale_ptln = true;
m_userhacks_align_sprite_X = false; m_userhacks_align_sprite_X = false;
@ -65,12 +68,14 @@ GSRendererHW::GSRendererHW(GSTextureCache* tc)
m_userHacks_HPO = 0; m_userHacks_HPO = 0;
} }
if (!m_upscale_multiplier) { //Custom Resolution if (!m_upscale_multiplier) // Custom Resolution
{
m_custom_width = m_width = theApp.GetConfigI("resx"); m_custom_width = m_width = theApp.GetConfigI("resx");
m_custom_height = m_height = theApp.GetConfigI("resy"); m_custom_height = m_height = theApp.GetConfigI("resy");
} }
if (m_upscale_multiplier == 1) { // hacks are only needed for upscaling issues. if (m_upscale_multiplier == 1) // hacks are only needed for upscaling issues.
{
m_userhacks_round_sprite_offset = 0; m_userhacks_round_sprite_offset = 0;
m_userhacks_align_sprite_X = false; m_userhacks_align_sprite_X = false;
m_userHacks_merge_sprite = false; m_userHacks_merge_sprite = false;
@ -326,7 +331,8 @@ GSTexture* GSRendererHW::GetOutput(int i, int& y_offset)
t = rt->m_texture; t = rt->m_texture;
int delta = TEX0.TBP0 - rt->m_TEX0.TBP0; int delta = TEX0.TBP0 - rt->m_TEX0.TBP0;
if (delta > 0 && DISPFB.FBW != 0) { if (delta > 0 && DISPFB.FBW != 0)
{
int pages = delta >> 5u; int pages = delta >> 5u;
int y_pages = pages / DISPFB.FBW; int y_pages = pages / DISPFB.FBW;
y_offset = y_pages * GSLocalMemory::m_psm[DISPFB.PSM].pgs.y; y_offset = y_pages * GSLocalMemory::m_psm[DISPFB.PSM].pgs.y;
@ -398,7 +404,8 @@ void GSRendererHW::Lines2Sprites()
v0.XYZ.Z = v1.XYZ.Z; v0.XYZ.Z = v1.XYZ.Z;
v0.FOG = v1.FOG; v0.FOG = v1.FOG;
if (PRIM->TME && !PRIM->FST) { if (PRIM->TME && !PRIM->FST)
{
GSVector4 st0 = GSVector4::loadl(&v0.ST.u64); GSVector4 st0 = GSVector4::loadl(&v0.ST.u64);
GSVector4 st1 = GSVector4::loadl(&v1.ST.u64); GSVector4 st1 = GSVector4::loadl(&v1.ST.u64);
GSVector4 Q = GSVector4(v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q); GSVector4 Q = GSVector4(v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q);
@ -508,7 +515,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
read_ba = (tex_pos > 112 && tex_pos < 144); read_ba = (tex_pos > 112 && tex_pos < 144);
bool half_bottom = false; bool half_bottom = false;
switch (m_userhacks_ts_half_bottom) { switch (m_userhacks_ts_half_bottom)
{
case 0: case 0:
// Force Disabled. // Force Disabled.
// Force Disabled will help games such as Xenosaga. // Force Disabled will help games such as Xenosaga.
@ -541,10 +549,12 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
break; break;
} }
if (PRIM->FST) { if (PRIM->FST)
{
GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U); GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U);
for(size_t i = 0; i < count; i += 2) { for (size_t i = 0; i < count; i += 2)
{
if (write_ba) if (write_ba)
v[i].XYZ.X -= 128u; v[i].XYZ.X -= 128u;
else else
@ -555,7 +565,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
else else
v[i+1].U += 128u; v[i+1].U += 128u;
if (!half_bottom){ if (!half_bottom)
{
// Height is too big (2x). // Height is too big (2x).
int tex_offset = v[i].V & 0xF; int tex_offset = v[i].V & 0xF;
GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset);
@ -569,11 +580,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
v[i + 1].V = (uint16)tmp.w; v[i + 1].V = (uint16)tmp.w;
} }
} }
} else { }
else
{
const float offset_8pix = 8.0f / tw; const float offset_8pix = 8.0f / tw;
GL_INS("First vertex is P: %d => %d T: %f => %f (offset %f)", v[0].XYZ.X, v[1].XYZ.X, v[0].ST.S, v[1].ST.S, offset_8pix); GL_INS("First vertex is P: %d => %d T: %f => %f (offset %f)", v[0].XYZ.X, v[1].XYZ.X, v[0].ST.S, v[1].ST.S, offset_8pix);
for(size_t i = 0; i < count; i += 2) { for (size_t i = 0; i < count; i += 2)
{
if (write_ba) if (write_ba)
v[i].XYZ.X -= 128u; v[i].XYZ.X -= 128u;
else else
@ -584,7 +598,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
else else
v[i+1].ST.S += offset_8pix; v[i+1].ST.S += offset_8pix;
if (!half_bottom) { if (!half_bottom)
{
// Height is too big (2x). // Height is too big (2x).
GSVector4i offset(o.OFY, o.OFY); GSVector4i offset(o.OFY, o.OFY);
@ -606,7 +621,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
else else
m_vt.m_max.p.x += 8.0f; m_vt.m_max.p.x += 8.0f;
if (!half_bottom) { if (!half_bottom)
{
float delta_Y = m_vt.m_max.p.y - m_vt.m_min.p.y; float delta_Y = m_vt.m_max.p.y - m_vt.m_min.p.y;
m_vt.m_max.p.y -= delta_Y / 2.0f; m_vt.m_max.p.y -= delta_Y / 2.0f;
} }
@ -616,7 +632,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
else else
m_vt.m_max.t.x += 8.0f; m_vt.m_max.t.x += 8.0f;
if (!half_bottom) { if (!half_bottom)
{
float delta_T = m_vt.m_max.t.y - m_vt.m_min.t.y; float delta_T = m_vt.m_max.t.y - m_vt.m_min.t.y;
m_vt.m_max.t.y -= delta_T / 2.0f; m_vt.m_max.t.y -= delta_T / 2.0f;
} }
@ -624,7 +641,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Source* tex) GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Source* tex)
{ {
if (m_userHacks_HPO <= 1 || GetUpscaleMultiplier() == 1) return GSVector4(0.0f); if (m_userHacks_HPO <= 1 || GetUpscaleMultiplier() == 1)
return GSVector4(0.0f);
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
const GSVector2& scale = tex->m_texture->GetScale(); const GSVector2& scale = tex->m_texture->GetScale();
@ -635,27 +653,41 @@ GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Sou
// FIXME Let's start with something wrong same mess on X and Y // FIXME Let's start with something wrong same mess on X and Y
// FIXME Maybe it will be enough to check linear // FIXME Maybe it will be enough to check linear
if (PRIM->FST) { if (PRIM->FST)
{
if (m_userHacks_HPO == 3) { if (m_userHacks_HPO == 3)
if (!linear && t_position == 8) { {
half_offset.x = 8; if (!linear && t_position == 8)
half_offset.y = 8; {
} else if (linear && t_position == 16) {
half_offset.x = 16;
half_offset.y = 16;
} else if (m_vt.m_min.p.x == -0.5f) {
half_offset.x = 8; half_offset.x = 8;
half_offset.y = 8; half_offset.y = 8;
} }
} else { else if (linear && t_position == 16)
if (!linear && t_position == 8) { {
half_offset.x = 16;
half_offset.y = 16;
}
else if (m_vt.m_min.p.x == -0.5f)
{
half_offset.x = 8;
half_offset.y = 8;
}
}
else
{
if (!linear && t_position == 8)
{
half_offset.x = 8 - 8 / scale.x; half_offset.x = 8 - 8 / scale.x;
half_offset.y = 8 - 8 / scale.y; half_offset.y = 8 - 8 / scale.y;
} else if (linear && t_position == 16) { }
else if (linear && t_position == 16)
{
half_offset.x = 16 - 16 / scale.x; half_offset.x = 16 - 16 / scale.x;
half_offset.y = 16 - 16 / scale.y; half_offset.y = 16 - 16 / scale.y;
} else if (m_vt.m_min.p.x == -0.5f) { }
else if (m_vt.m_min.p.x == -0.5f)
{
half_offset.x = 8; half_offset.x = 8;
half_offset.y = 8; half_offset.y = 8;
} }
@ -663,8 +695,9 @@ GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Sou
GL_INS("offset detected %f,%f t_pos %d (linear %d, scale %f)", GL_INS("offset detected %f,%f t_pos %d (linear %d, scale %f)",
half_offset.x, half_offset.y, t_position, linear, scale.x); half_offset.x, half_offset.y, t_position, linear, scale.x);
}
} else if (m_vt.m_eq.q) { else if (m_vt.m_eq.q)
{
float tw = (float)(1 << m_context->TEX0.TW); float tw = (float)(1 << m_context->TEX0.TW);
float th = (float)(1 << m_context->TEX0.TH); float th = (float)(1 << m_context->TEX0.TH);
float q = v[0].RGBAQ.Q; float q = v[0].RGBAQ.Q;
@ -675,7 +708,6 @@ GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Sou
GL_INS("ST offset detected %f,%f (linear %d, scale %f)", GL_INS("ST offset detected %f,%f (linear %d, scale %f)",
half_offset.x, half_offset.y, linear, scale.x); half_offset.x, half_offset.y, linear, scale.x);
} }
return half_offset; return half_offset;
@ -692,8 +724,10 @@ GSVector4i GSRendererHW::ComputeBoundingBox(const GSVector2& rtscale, const GSVe
void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) void GSRendererHW::MergeSprite(GSTextureCache::Source* tex)
{ {
// Upscaling hack to avoid various line/grid issues // Upscaling hack to avoid various line/grid issues
if (m_userHacks_merge_sprite && tex && tex->m_target && (m_vt.m_primclass == GS_SPRITE_CLASS)) { if (m_userHacks_merge_sprite && tex && tex->m_target && (m_vt.m_primclass == GS_SPRITE_CLASS))
if (PRIM->FST && GSLocalMemory::m_psm[tex->m_TEX0.PSM].fmt < 2 && ((m_vt.m_eq.value & 0xCFFFF) == 0xCFFFF)) { {
if (PRIM->FST && GSLocalMemory::m_psm[tex->m_TEX0.PSM].fmt < 2 && ((m_vt.m_eq.value & 0xCFFFF) == 0xCFFFF))
{
// Ideally the hack ought to be enabled in a true paving mode only. I don't know how to do it accurately // Ideally the hack ought to be enabled in a true paving mode only. I don't know how to do it accurately
// neither in a fast way. So instead let's just take the hypothesis that all sprites must have the same // neither in a fast way. So instead let's just take the hypothesis that all sprites must have the same
@ -704,10 +738,12 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex)
// SSE optimization: shuffle m[1] to have (4*32 bits) X, Y, U, V // SSE optimization: shuffle m[1] to have (4*32 bits) X, Y, U, V
int first_dpX = v[1].XYZ.X - v[0].XYZ.X; int first_dpX = v[1].XYZ.X - v[0].XYZ.X;
int first_dpU = v[1].U - v[0].U; int first_dpU = v[1].U - v[0].U;
for (size_t i = 0; i < m_vertex.next; i += 2) { for (size_t i = 0; i < m_vertex.next; i += 2)
{
int dpX = v[i + 1].XYZ.X - v[i].XYZ.X; int dpX = v[i + 1].XYZ.X - v[i].XYZ.X;
int dpU = v[i + 1].U - v[i].U; int dpU = v[i + 1].U - v[i].U;
if (dpX != first_dpX || dpU != first_dpU) { if (dpX != first_dpX || dpU != first_dpU)
{
is_paving = false; is_paving = false;
break; break;
} }
@ -720,7 +756,8 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex)
GL_INS("PP SAMPLER: Dp %f %f Dt %f %f. Is blit %d, is paving %d, count %d", delta_p.x, delta_p.y, delta_t.x, delta_t.y, is_blit, is_paving, m_vertex.tail); GL_INS("PP SAMPLER: Dp %f %f Dt %f %f. Is blit %d, is paving %d, count %d", delta_p.x, delta_p.y, delta_t.x, delta_t.y, is_blit, is_paving, m_vertex.tail);
#endif #endif
if (is_paving) { if (is_paving)
{
// Replace all sprite with a single fullscreen sprite. // Replace all sprite with a single fullscreen sprite.
GSVertex* s = &m_vertex.buff[0]; GSVertex* s = &m_vertex.buff[0];
@ -752,7 +789,8 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
{ {
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM); // printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
if(clut) return; // FIXME if (clut)
return; // FIXME
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
} }
@ -1035,7 +1073,8 @@ void GSRendererHW::RoundSpriteOffset()
size_t count = m_vertex.next; size_t count = m_vertex.next;
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
for(size_t i = 0; i < count; i += 2) { for (size_t i = 0; i < count; i += 2)
{
// Performance note: if it had any impact on perf, someone would port it to SSE (AKA GSVector) // Performance note: if it had any impact on perf, someone would port it to SSE (AKA GSVector)
// Compute the coordinate of first and last texels (in native with a linear filtering) // Compute the coordinate of first and last texels (in native with a linear filtering)
@ -1048,7 +1087,8 @@ void GSRendererHW::RoundSpriteOffset()
uint16 tx0 = Interpolate_UV(ax0, v[i].U, v[i + 1].U); uint16 tx0 = Interpolate_UV(ax0, v[i].U, v[i + 1].U);
uint16 tx1 = Interpolate_UV(ax1, v[i].U, v[i + 1].U); uint16 tx1 = Interpolate_UV(ax1, v[i].U, v[i + 1].U);
#ifdef DEBUG_U #ifdef DEBUG_U
if (debug) { if (debug)
{
fprintf(stderr, "u0:%d and u1:%d\n", v[i].U, v[i + 1].U); fprintf(stderr, "u0:%d and u1:%d\n", v[i].U, v[i + 1].U);
fprintf(stderr, "a0:%f and a1:%f\n", ax0, ax1); fprintf(stderr, "a0:%f and a1:%f\n", ax0, ax1);
fprintf(stderr, "t0:%d and t1:%d\n", tx0, tx1); fprintf(stderr, "t0:%d and t1:%d\n", tx0, tx1);
@ -1064,7 +1104,8 @@ void GSRendererHW::RoundSpriteOffset()
uint16 ty0 = Interpolate_UV(ay0, v[i].V, v[i + 1].V); uint16 ty0 = Interpolate_UV(ay0, v[i].V, v[i + 1].V);
uint16 ty1 = Interpolate_UV(ay1, v[i].V, v[i + 1].V); uint16 ty1 = Interpolate_UV(ay1, v[i].V, v[i + 1].V);
#ifdef DEBUG_V #ifdef DEBUG_V
if (debug) { if (debug)
{
fprintf(stderr, "v0:%d and v1:%d\n", v[i].V, v[i + 1].V); fprintf(stderr, "v0:%d and v1:%d\n", v[i].V, v[i + 1].V);
fprintf(stderr, "a0:%f and a1:%f\n", ay0, ay1); fprintf(stderr, "a0:%f and a1:%f\n", ay0, ay1);
fprintf(stderr, "t0:%d and t1:%d\n", ty0, ty1); fprintf(stderr, "t0:%d and t1:%d\n", ty0, ty1);
@ -1088,33 +1129,47 @@ void GSRendererHW::RoundSpriteOffset()
// of interpolation migth trigger a discard (with alpha testing) // of interpolation migth trigger a discard (with alpha testing)
// Let's use something simple that correct really bad case (for a couple of 2D games). // Let's use something simple that correct really bad case (for a couple of 2D games).
// I hope it won't create too much glitches. // I hope it won't create too much glitches.
if (linear) { if (linear)
{
int Lu = v[i + 1].U - v[i].U; int Lu = v[i + 1].U - v[i].U;
// Note 32 is based on taisho-mononoke // Note 32 is based on taisho-mononoke
if ((Lu > 0) && (Lu <= (Lx+32))) { if ((Lu > 0) && (Lu <= (Lx + 32)))
{
v[i + 1].U -= 8; v[i + 1].U -= 8;
} }
} else { }
if (tx0 <= tx1) { else
{
if (tx0 <= tx1)
{
v[i].U = tx0; v[i].U = tx0;
v[i + 1].U = tx1 + 16; v[i + 1].U = tx1 + 16;
} else { }
else
{
v[i].U = tx0 + 15; v[i].U = tx0 + 15;
v[i + 1].U = tx1; v[i + 1].U = tx1;
} }
} }
#endif #endif
#if 1 #if 1
if (linear) { if (linear)
{
int Lv = v[i + 1].V - v[i].V; int Lv = v[i + 1].V - v[i].V;
if ((Lv > 0) && (Lv <= (Ly+32))) { if ((Lv > 0) && (Lv <= (Ly + 32)))
{
v[i + 1].V -= 8; v[i + 1].V -= 8;
} }
} else { }
if (ty0 <= ty1) { else
{
if (ty0 <= ty1)
{
v[i].V = ty0; v[i].V = ty0;
v[i + 1].V = ty1 + 16; v[i + 1].V = ty1 + 16;
} else { }
else
{
v[i].V = ty0 + 15; v[i].V = ty0 + 15;
v[i + 1].V = ty1; v[i + 1].V = ty1;
} }
@ -1129,13 +1184,13 @@ void GSRendererHW::RoundSpriteOffset()
if (debug) if (debug)
fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].V, v[i + 1].V); fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].V, v[i + 1].V);
#endif #endif
} }
} }
void GSRendererHW::Draw() void GSRendererHW::Draw()
{ {
if(m_dev->IsLost() || IsBadFrame()) { if (m_dev->IsLost() || IsBadFrame())
{
GL_INS("Warning skipping a draw call (%d)", s_n); GL_INS("Warning skipping a draw call (%d)", s_n);
return; return;
} }
@ -1187,22 +1242,31 @@ void GSRendererHW::Draw()
const GSVector4 delta_p = m_vt.m_max.p - m_vt.m_min.p; const GSVector4 delta_p = m_vt.m_max.p - m_vt.m_min.p;
bool single_page = (delta_p.x <= 64.0f) && (delta_p.y <= 64.0f); bool single_page = (delta_p.x <= 64.0f) && (delta_p.y <= 64.0f);
if (m_channel_shuffle) { if (m_channel_shuffle)
{
m_channel_shuffle = draw_sprite_tex && (m_context->TEX0.PSM == PSM_PSMT8) && single_page; m_channel_shuffle = draw_sprite_tex && (m_context->TEX0.PSM == PSM_PSMT8) && single_page;
if (m_channel_shuffle) { if (m_channel_shuffle)
{
GL_CACHE("Channel shuffle effect detected SKIP"); GL_CACHE("Channel shuffle effect detected SKIP");
return; return;
} }
} else if (draw_sprite_tex && m_context->FRAME.Block() == m_context->TEX0.TBP0) { }
else if (draw_sprite_tex && m_context->FRAME.Block() == m_context->TEX0.TBP0)
{
// Special post-processing effect // Special post-processing effect
if ((m_context->TEX0.PSM == PSM_PSMT8) && single_page) { if ((m_context->TEX0.PSM == PSM_PSMT8) && single_page)
{
GL_INS("Channel shuffle effect detected"); GL_INS("Channel shuffle effect detected");
m_channel_shuffle = true; m_channel_shuffle = true;
} else { }
else
{
GL_DBG("Special post-processing effect not supported"); GL_DBG("Special post-processing effect not supported");
m_channel_shuffle = false; m_channel_shuffle = false;
} }
} else { }
else
{
m_channel_shuffle = false; m_channel_shuffle = false;
} }
@ -1214,7 +1278,8 @@ void GSRendererHW::Draw()
GSTextureCache::Target* rt = NULL; GSTextureCache::Target* rt = NULL;
GSTexture* rt_tex = NULL; GSTexture* rt_tex = NULL;
if (!no_rt) { if (!no_rt)
{
rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true, fm); rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true, fm);
rt_tex = rt->m_texture; rt_tex = rt->m_texture;
} }
@ -1225,7 +1290,8 @@ void GSRendererHW::Draw()
GSTextureCache::Target* ds = NULL; GSTextureCache::Target* ds = NULL;
GSTexture* ds_tex = NULL; GSTexture* ds_tex = NULL;
if (!no_ds) { if (!no_ds)
{
ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
ds_tex = ds->m_texture; ds_tex = ds->m_texture;
} }
@ -1240,33 +1306,42 @@ void GSRendererHW::Draw()
m_lod = GSVector2i(0, 0); m_lod = GSVector2i(0, 0);
// Code from the SW renderer // Code from the SW renderer
if (IsMipMapActive()) { if (IsMipMapActive())
{
int interpolation = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri int interpolation = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri
int k = (m_context->TEX1.K + 8) >> 4; int k = (m_context->TEX1.K + 8) >> 4;
int lcm = m_context->TEX1.LCM; int lcm = m_context->TEX1.LCM;
if ((int)m_vt.m_lod.x >= mxl) { if ((int)m_vt.m_lod.x >= mxl)
{
k = mxl; // set lod to max level k = mxl; // set lod to max level
lcm = 1; // constant lod lcm = 1; // constant lod
} }
if (PRIM->FST) { if (PRIM->FST)
{
ASSERT(lcm == 1); ASSERT(lcm == 1);
ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
lcm = 1; lcm = 1;
} }
if (lcm == 1) { if (lcm == 1)
{
m_lod.x = std::max<int>(k, 0); m_lod.x = std::max<int>(k, 0);
m_lod.y = m_lod.x; m_lod.y = m_lod.x;
} else { }
else
{
// Not constant but who care ! // Not constant but who care !
if (interpolation == 2) { if (interpolation == 2)
{
// Mipmap Linear. Both layers are sampled, only take the big one // Mipmap Linear. Both layers are sampled, only take the big one
m_lod.x = std::max<int>((int)floor(m_vt.m_lod.x), 0); m_lod.x = std::max<int>((int)floor(m_vt.m_lod.x), 0);
} else { }
else
{
// On GS lod is a fixed float number 7:4 (4 bit for the frac part) // On GS lod is a fixed float number 7:4 (4 bit for the frac part)
#if 0 #if 0
m_lod.x = std::max<int>((int)round(m_vt.m_lod.x + 0.0625), 0); m_lod.x = std::max<int>((int)round(m_vt.m_lod.x + 0.0625), 0);
@ -1294,13 +1369,16 @@ void GSRendererHW::Draw()
MIP_CLAMP.MAXU >>= m_lod.x; MIP_CLAMP.MAXU >>= m_lod.x;
MIP_CLAMP.MAXV >>= m_lod.x; MIP_CLAMP.MAXV >>= m_lod.x;
for (int i = 0; i < m_lod.x; i++) { for (int i = 0; i < m_lod.x; i++)
{
m_vt.m_min.t *= 0.5f; m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f; m_vt.m_max.t *= 0.5f;
} }
GL_CACHE("Mipmap LOD %d %d (%f %f) new size %dx%d (K %d L %u)", m_lod.x, m_lod.y, m_vt.m_lod.x, m_vt.m_lod.y, 1 << TEX0.TW, 1 << TEX0.TH, m_context->TEX1.K, m_context->TEX1.L); GL_CACHE("Mipmap LOD %d %d (%f %f) new size %dx%d (K %d L %u)", m_lod.x, m_lod.y, m_vt.m_lod.x, m_vt.m_lod.y, 1 << TEX0.TW, 1 << TEX0.TH, m_context->TEX1.K, m_context->TEX1.L);
} else { }
else
{
TEX0 = GetTex0Layer(0); TEX0 = GetTex0Layer(0);
} }
@ -1313,12 +1391,14 @@ void GSRendererHW::Draw()
m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, r) : m_tc->LookupSource(TEX0, env.TEXA, r); m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, r) : m_tc->LookupSource(TEX0, env.TEXA, r);
// Round 2 // Round 2
if (IsMipMapActive() && m_mipmap == 2 && !tex_psm.depth) { if (IsMipMapActive() && m_mipmap == 2 && !tex_psm.depth)
{
// Upload remaining texture layers // Upload remaining texture layers
GSVector4 tmin = m_vt.m_min.t; GSVector4 tmin = m_vt.m_min.t;
GSVector4 tmax = m_vt.m_max.t; GSVector4 tmax = m_vt.m_max.t;
for (int layer = m_lod.x + 1; layer <= m_lod.y; layer++) { for (int layer = m_lod.x + 1; layer <= m_lod.y; layer++)
{
const GIFRegTEX0& MIP_TEX0 = GetTex0Layer(layer); const GIFRegTEX0& MIP_TEX0 = GetTex0Layer(layer);
m_context->offset.tex = m_mem.GetOffset(MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM); m_context->offset.tex = m_mem.GetOffset(MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM);
@ -1348,7 +1428,8 @@ void GSRendererHW::Draw()
&& draw_sprite_tex && m_src->m_32_bits_fmt; && draw_sprite_tex && m_src->m_32_bits_fmt;
// Okami mustn't call this code // Okami mustn't call this code
if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && (m_context->FRAME.FBMSK == 0)) { if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && (m_context->FRAME.FBMSK == 0))
{
// Avious dubious call to m_texture_shuffle on 16 bits games // Avious dubious call to m_texture_shuffle on 16 bits games
// The pattern is severals column of 8 pixels. A single sprite // The pattern is severals column of 8 pixels. A single sprite
// smell fishy but a big sprite is wrong. // smell fishy but a big sprite is wrong.
@ -1373,14 +1454,18 @@ void GSRendererHW::Draw()
// Texture shuffle is not yet supported with strange clamp mode // Texture shuffle is not yet supported with strange clamp mode
ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3)); ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3));
if (m_src->m_target && m_context->TEX0.PSM == PSM_PSMT8 && single_page && draw_sprite_tex) { if (m_src->m_target && m_context->TEX0.PSM == PSM_PSMT8 && single_page && draw_sprite_tex)
{
GL_INS("Channel shuffle effect detected (2nd shot)"); GL_INS("Channel shuffle effect detected (2nd shot)");
m_channel_shuffle = true; m_channel_shuffle = true;
} else { }
else
{
m_channel_shuffle = false; m_channel_shuffle = false;
} }
} }
if (rt) { if (rt)
{
// Be sure texture shuffle detection is properly propagated // Be sure texture shuffle detection is properly propagated
// Otherwise set or clear the flag (Code in texture cache only set the flag) // Otherwise set or clear the flag (Code in texture cache only set the flag)
// Note: it is important to clear the flag when RT is used as a real 16 bits target. // Note: it is important to clear the flag when RT is used as a real 16 bits target.
@ -1393,7 +1478,8 @@ void GSRendererHW::Draw()
std::string s; std::string s;
if (s_n >= s_saven) { if (s_n >= s_saven)
{
// Dump Register state // Dump Register state
s = format("%05d_context.txt", s_n); s = format("%05d_context.txt", s_n);
@ -1434,7 +1520,6 @@ void GSRendererHW::Draw()
if (ds_tex) if (ds_tex)
ds_tex->Save(m_dump_root + s); ds_tex->Save(m_dump_root + s);
} }
} }
// The rectangle of the draw // The rectangle of the draw
@ -1446,12 +1531,14 @@ void GSRendererHW::Draw()
return; return;
} }
if (!OI_BlitFMV(rt, m_src, m_r)) { if (!OI_BlitFMV(rt, m_src, m_r))
{
GL_INS("Warning skipping a draw call (%d)", s_n); GL_INS("Warning skipping a draw call (%d)", s_n);
return; return;
} }
if (m_userhacks_enabled_gs_mem_clear) { if (m_userhacks_enabled_gs_mem_clear)
{
// Constant Direct Write without texture/test/blending (aka a GS mem clear) // Constant Direct Write without texture/test/blending (aka a GS mem clear)
if ((m_vt.m_primclass == GS_SPRITE_CLASS) && !PRIM->TME // Direct write if ((m_vt.m_primclass == GS_SPRITE_CLASS) && !PRIM->TME // Direct write
&& (!PRIM->ABE || m_context->ALPHA.IsOpaque()) // No transparency && (!PRIM->ABE || m_context->ALPHA.IsOpaque()) // No transparency
@ -1470,23 +1557,27 @@ void GSRendererHW::Draw()
// A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite // A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite
// Note: first hack corrects both position and texture coordinate // Note: first hack corrects both position and texture coordinate
// Note: second hack corrects only the texture coordinate // Note: second hack corrects only the texture coordinate
if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS))
{
size_t count = m_vertex.next; size_t count = m_vertex.next;
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
// Hack to avoid vertical black line in various games (ace combat/tekken) // Hack to avoid vertical black line in various games (ace combat/tekken)
if (m_userhacks_align_sprite_X) { if (m_userhacks_align_sprite_X)
{
// Note for performance reason I do the check only once on the first // Note for performance reason I do the check only once on the first
// primitive // primitive
int win_position = v[1].XYZ.X - context->XYOFFSET.OFX; int win_position = v[1].XYZ.X - context->XYOFFSET.OFX;
const bool unaligned_position = ((win_position & 0xF) == 8); const bool unaligned_position = ((win_position & 0xF) == 8);
const bool unaligned_texture = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful const bool unaligned_texture = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful
const bool hole_in_vertex = (count < 4) || (v[1].XYZ.X != v[2].XYZ.X); const bool hole_in_vertex = (count < 4) || (v[1].XYZ.X != v[2].XYZ.X);
if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST)) { if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST))
{
// Normaly vertex are aligned on full pixels and texture in half // Normaly vertex are aligned on full pixels and texture in half
// pixels. Let's extend the coverage of an half-pixel to avoid // pixels. Let's extend the coverage of an half-pixel to avoid
// hole after upscaling // hole after upscaling
for(size_t i = 0; i < count; i += 2) { for (size_t i = 0; i < count; i += 2)
{
v[i + 1].XYZ.X += 8; v[i + 1].XYZ.X += 8;
// I really don't know if it is a good idea. Neither what to do for !PRIM->FST // I really don't know if it is a good idea. Neither what to do for !PRIM->FST
if (unaligned_texture) if (unaligned_texture)
@ -1496,14 +1587,18 @@ void GSRendererHW::Draw()
} }
// Noting to do if no texture is sampled // Noting to do if no texture is sampled
if (PRIM->FST && draw_sprite_tex) { if (PRIM->FST && draw_sprite_tex)
if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear())) { {
if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear()))
{
if (m_vt.IsLinear()) if (m_vt.IsLinear())
RoundSpriteOffset<true>(); RoundSpriteOffset<true>();
else else
RoundSpriteOffset<false>(); RoundSpriteOffset<false>();
} }
} else { }
else
{
; // vertical line in Yakuza (note check m_userhacks_align_sprite_X behavior) ; // vertical line in Yakuza (note check m_userhacks_align_sprite_X behavior)
} }
} }
@ -1522,10 +1617,12 @@ void GSRendererHW::Draw()
// Help to detect rendering outside of the framebuffer // Help to detect rendering outside of the framebuffer
#if _DEBUG #if _DEBUG
if (m_upscale_multiplier * m_r.z > m_width) { if (m_upscale_multiplier * m_r.z > m_width)
{
GL_INS("ERROR: RT width is too small only %d but require %d", m_width, m_upscale_multiplier * m_r.z); GL_INS("ERROR: RT width is too small only %d but require %d", m_width, m_upscale_multiplier * m_r.z);
} }
if (m_upscale_multiplier * m_r.w > m_height) { if (m_upscale_multiplier * m_r.w > m_height)
{
GL_INS("ERROR: RT height is too small only %d but require %d", m_height, m_upscale_multiplier * m_r.w); GL_INS("ERROR: RT height is too small only %d but require %d", m_height, m_upscale_multiplier * m_r.w);
} }
#endif #endif
@ -1629,7 +1726,8 @@ void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game)
m_oo = m_oo_map[hash]; m_oo = m_oo_map[hash];
m_cu = m_cu_map[hash]; m_cu = m_cu_map[hash];
if (game.flags & CRC::PointListPalette) { if (game.flags & CRC::PointListPalette)
{
ASSERT(m_oi == NULL); ASSERT(m_oi == NULL);
m_oi = &GSRendererHW::OI_PointListPalette; m_oi = &GSRendererHW::OI_PointListPalette;
@ -1644,7 +1742,8 @@ void GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds)
// Note gs mem clear must be tested before calling this function // Note gs mem clear must be tested before calling this function
// Limit further to unmask Z write // Limit further to unmask Z write
if (!m_context->ZBUF.ZMSK && rt && ds) { if (!m_context->ZBUF.ZMSK && rt && ds)
{
const GSVertex* v = &m_vertex.buff[0]; const GSVertex* v = &m_vertex.buff[0];
const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
//const GSLocalMemory::psm_t& depth_psm = GSLocalMemory::m_psm[m_context->ZBUF.PSM]; //const GSLocalMemory::psm_t& depth_psm = GSLocalMemory::m_psm[m_context->ZBUF.PSM];
@ -1665,16 +1764,20 @@ void GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds)
// Frame and depth pointer can be inverted // Frame and depth pointer can be inverted
uint32 base; uint32 base;
uint32 half; uint32 half;
if (m_context->FRAME.FBP > m_context->ZBUF.ZBP) { if (m_context->FRAME.FBP > m_context->ZBUF.ZBP)
{
base = m_context->ZBUF.ZBP; base = m_context->ZBUF.ZBP;
half = m_context->FRAME.FBP; half = m_context->FRAME.FBP;
} else { }
else
{
base = m_context->FRAME.FBP; base = m_context->FRAME.FBP;
half = m_context->ZBUF.ZBP; half = m_context->ZBUF.ZBP;
} }
// If both buffers are side by side we can expect a fast clear in on-going // If both buffers are side by side we can expect a fast clear in on-going
if (half <= (base + written_pages)) { if (half <= (base + written_pages))
{
uint32 color = v[1].RGBAQ.u32[0]; uint32 color = v[1].RGBAQ.u32[0];
bool clear_depth = (m_context->FRAME.FBP > m_context->ZBUF.ZBP); bool clear_depth = (m_context->FRAME.FBP > m_context->ZBUF.ZBP);
@ -1686,11 +1789,14 @@ void GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds)
GSVector4i commitRect = ComputeBoundingBox(t->GetScale(), t->GetSize()); GSVector4i commitRect = ComputeBoundingBox(t->GetScale(), t->GetSize());
t->CommitRegion(GSVector2i(commitRect.z, 2 * commitRect.w)); t->CommitRegion(GSVector2i(commitRect.z, 2 * commitRect.w));
if (clear_depth) { if (clear_depth)
{
// Only pure clear are supported for depth // Only pure clear are supported for depth
ASSERT(color == 0); ASSERT(color == 0);
m_dev->ClearDepth(t); m_dev->ClearDepth(t);
} else { }
else
{
m_dev->ClearRenderTarget(t, color); m_dev->ClearRenderTarget(t, color);
} }
} }
@ -1703,7 +1809,8 @@ void GSRendererHW::OI_GsMemClear()
// Note gs mem clear must be tested before calling this function // Note gs mem clear must be tested before calling this function
// Limit it further to a full screen 0 write // Limit it further to a full screen 0 write
if ((m_vertex.next == 2) && m_vt.m_min.c.eq(GSVector4i(0))) { if ((m_vertex.next == 2) && m_vt.m_min.c.eq(GSVector4i(0)))
{
GSOffset* off = m_context->offset.fb; GSOffset* off = m_context->offset.fb;
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in)); GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
// Limit the hack to a single fullscreen clear. Some games might use severals column to clear a screen // Limit the hack to a single fullscreen clear. Some games might use severals column to clear a screen
@ -1717,7 +1824,8 @@ void GSRendererHW::OI_GsMemClear()
// FIXME: loop can likely be optimized with AVX/SSE. Pixels aren't // FIXME: loop can likely be optimized with AVX/SSE. Pixels aren't
// linear but the value will be done for all pixels of a block. // linear but the value will be done for all pixels of a block.
// FIXME: maybe we could limit the write to the top and bottom row page. // FIXME: maybe we could limit the write to the top and bottom row page.
if (format == 0) { if (format == 0)
{
// Based on WritePixel32 // Based on WritePixel32
for (int y = r.top; y < r.bottom; y++) for (int y = r.top; y < r.bottom; y++)
{ {
@ -1729,7 +1837,9 @@ void GSRendererHW::OI_GsMemClear()
d[col[x]] = 0; // Here the constant color d[col[x]] = 0; // Here the constant color
} }
} }
} else if (format == 1) { }
else if (format == 1)
{
// Based on WritePixel24 // Based on WritePixel24
for (int y = r.top; y < r.bottom; y++) for (int y = r.top; y < r.bottom; y++)
{ {
@ -1741,7 +1851,9 @@ void GSRendererHW::OI_GsMemClear()
d[col[x]] &= 0xff000000; // Clear the color d[col[x]] &= 0xff000000; // Clear the color
} }
} }
} else if (format == 2) { }
else if (format == 2)
{
; // Hack is used for FMV which are likely 24/32 bits. Let's keep the for reference ; // Hack is used for FMV which are likely 24/32 bits. Let's keep the for reference
#if 0 #if 0
// Based on WritePixel16 // Based on WritePixel16
@ -1762,7 +1874,8 @@ void GSRendererHW::OI_GsMemClear()
bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw)
{ {
if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && PRIM->TME && !PRIM->ABE && tex && !tex->m_target && m_context->TEX0.TBW > 0) { if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && PRIM->TME && !PRIM->ABE && tex && !tex->m_target && m_context->TEX0.TBW > 0)
{
GL_PUSH("OI_BlitFMV"); GL_PUSH("OI_BlitFMV");
GL_INS("OI_BlitFMV"); GL_INS("OI_BlitFMV");
@ -1800,7 +1913,8 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc
// Do the blit. With a Copy mess to avoid issue with limited API (dx) // Do the blit. With a Copy mess to avoid issue with limited API (dx)
// m_dev->StretchRect(tex->m_texture, sRect, tex->m_texture, dRect); // m_dev->StretchRect(tex->m_texture, sRect, tex->m_texture, dRect);
GSVector4i r_full(0, 0, tw, th); GSVector4i r_full(0, 0, tw, th);
if (GSTexture* rt = m_dev->CreateRenderTarget(tw, th)) { if (GSTexture* rt = m_dev->CreateRenderTarget(tw, th))
{
m_dev->CopyRect(tex->m_texture, rt, r_full); m_dev->CopyRect(tex->m_texture, rt, r_full);
m_dev->StretchRect(tex->m_texture, sRect, rt, dRect); m_dev->StretchRect(tex->m_texture, sRect, rt, dRect);
@ -1891,7 +2005,8 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
{ {
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454 // incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
if(!video) video = new uint32[512 * 512]; if (!video)
video = new uint32[512 * 512];
int ox = m_context->XYOFFSET.OFX - 8; int ox = m_context->XYOFFSET.OFX - 8;
int oy = m_context->XYOFFSET.OFY - 8; int oy = m_context->XYOFFSET.OFY - 8;
@ -1903,7 +2018,8 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
int x = (v->XYZ.X - ox) >> 4; int x = (v->XYZ.X - ox) >> 4;
int y = (v->XYZ.Y - oy) >> 4; int y = (v->XYZ.Y - oy) >> 4;
if (x < 0 || x >= 448 || y < 0 || y >= (int)lines) return false; // le sigh if (x < 0 || x >= 448 || y < 0 || y >= (int)lines)
return false; // le sigh
video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.u32[0]; video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.u32[0];
} }
@ -2194,7 +2310,8 @@ bool GSRendererHW::OI_ArTonelico2(GSTexture* rt, GSTexture* ds, GSTextureCache::
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
if (m_vertex.next == 2 && !PRIM->TME && m_context->FRAME.FBW == 10 && v->XYZ.Z == 0 && m_context->TEST.ZTST == ZTST_ALWAYS) { if (m_vertex.next == 2 && !PRIM->TME && m_context->FRAME.FBW == 10 && v->XYZ.Z == 0 && m_context->TEST.ZTST == ZTST_ALWAYS)
{
GL_INS("OI_ArTonelico2"); GL_INS("OI_ArTonelico2");
if (ds) if (ds)
ds->Commit(); // Don't bother to save few MB for a single game ds->Commit(); // Don't bother to save few MB for a single game

View File

@ -73,7 +73,8 @@ private:
class Hacks class Hacks
{ {
template<class T> class HackEntry template <class T>
class HackEntry
{ {
public: public:
CRC::Title title; CRC::Title title;
@ -88,7 +89,8 @@ private:
} }
}; };
template<class T> class FunctionMap : public GSFunctionMap<uint32, T> template <class T>
class FunctionMap : public GSFunctionMap<uint32, T>
{ {
std::list<HackEntry<T>>& m_tbl; std::list<HackEntry<T>>& m_tbl;
@ -109,7 +111,10 @@ private:
} }
public: public:
FunctionMap(std::list<HackEntry<T> >& tbl) : m_tbl(tbl) {} FunctionMap(std::list<HackEntry<T>>& tbl)
: m_tbl(tbl)
{
}
}; };
std::list<HackEntry<OI_Ptr>> m_oi_list; std::list<HackEntry<OI_Ptr>> m_oi_list;
@ -139,7 +144,8 @@ private:
void SwSpriteRender(); void SwSpriteRender();
bool CanUseSwSpriteRender(bool allow_64x64_sprite); bool CanUseSwSpriteRender(bool allow_64x64_sprite);
template <bool linear> void RoundSpriteOffset(); template <bool linear>
void RoundSpriteOffset();
protected: protected:
GSTextureCache* m_tc; GSTextureCache* m_tc;

View File

@ -31,7 +31,8 @@ GSTextureCache::GSTextureCache(GSRenderer* r)
: m_renderer(r) : m_renderer(r)
, m_palette_map(r) , m_palette_map(r)
{ {
if (theApp.GetConfigB("UserHacks")) { if (theApp.GetConfigB("UserHacks"))
{
UserHacks_HalfPixelOffset = theApp.GetConfigI("UserHacks_HalfPixelOffset") == 1; UserHacks_HalfPixelOffset = theApp.GetConfigI("UserHacks_HalfPixelOffset") == 1;
m_preload_frame = theApp.GetConfigB("preload_frame_with_gs_data"); m_preload_frame = theApp.GetConfigB("preload_frame_with_gs_data");
m_disable_partial_invalidation = theApp.GetConfigB("UserHacks_DisablePartialInvalidation"); m_disable_partial_invalidation = theApp.GetConfigB("UserHacks_DisablePartialInvalidation");
@ -39,7 +40,9 @@ GSTextureCache::GSTextureCache(GSRenderer* r)
m_cpu_fb_conversion = theApp.GetConfigB("UserHacks_CPU_FB_Conversion"); m_cpu_fb_conversion = theApp.GetConfigB("UserHacks_CPU_FB_Conversion");
m_texture_inside_rt = theApp.GetConfigB("UserHacks_TextureInsideRt"); m_texture_inside_rt = theApp.GetConfigB("UserHacks_TextureInsideRt");
m_wrap_gs_mem = theApp.GetConfigB("wrap_gs_mem"); m_wrap_gs_mem = theApp.GetConfigB("wrap_gs_mem");
} else { }
else
{
UserHacks_HalfPixelOffset = false; UserHacks_HalfPixelOffset = false;
m_preload_frame = false; m_preload_frame = false;
m_disable_partial_invalidation = false; m_disable_partial_invalidation = false;
@ -77,7 +80,8 @@ void GSTextureCache::RemovePartial()
for (int type = 0; type < 2; type++) for (int type = 0; type < 2; type++)
{ {
for (auto t : m_dst[type]) delete t; for (auto t : m_dst[type])
delete t;
m_dst[type].clear(); m_dst[type].clear();
} }
@ -89,7 +93,8 @@ void GSTextureCache::RemoveAll()
for (int type = 0; type < 2; type++) for (int type = 0; type < 2; type++)
{ {
for (auto t : m_dst[type]) delete t; for (auto t : m_dst[type])
delete t;
m_dst[type].clear(); m_dst[type].clear();
} }
@ -99,13 +104,17 @@ void GSTextureCache::RemoveAll()
GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette) GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette)
{ {
if (!m_can_convert_depth) { if (!m_can_convert_depth)
{
GL_CACHE("LookupDepthSource not supported (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM); GL_CACHE("LookupDepthSource not supported (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM);
if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos) { if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos)
{
// JackieChan and SVCChaos cause regressions when skipping the draw calls when depth is disabled/not supported. // JackieChan and SVCChaos cause regressions when skipping the draw calls when depth is disabled/not supported.
// This way we make sure there are no regressions on D3D as well. // This way we make sure there are no regressions on D3D as well.
return LookupSource(TEX0, TEXA, r); return LookupSource(TEX0, TEXA, r);
} else { }
else
{
throw GSDXRecoverableError(); throw GSDXRecoverableError();
} }
} }
@ -119,24 +128,30 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
uint32 bp = TEX0.TBP0; uint32 bp = TEX0.TBP0;
uint32 psm = TEX0.PSM; uint32 psm = TEX0.PSM;
for(auto t : m_dst[DepthStencil]) { for (auto t : m_dst[DepthStencil])
{
if (t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) if (t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{ {
ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth);
if (t->m_age == 0) { if (t->m_age == 0)
{
// Perfect Match // Perfect Match
dst = t; dst = t;
break; break;
} else if (t->m_age == 1) { }
else if (t->m_age == 1)
{
// Better than nothing (Full Spectrum Warrior) // Better than nothing (Full Spectrum Warrior)
dst = t; dst = t;
} }
} }
} }
if (!dst) { if (!dst)
{
// Retry on the render target (Silent Hill 4) // Retry on the render target (Silent Hill 4)
for(auto t : m_dst[RenderTarget]) { for (auto t : m_dst[RenderTarget])
{
// FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ??? // FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ???
if (!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) if (!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{ {
@ -147,7 +162,8 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
} }
} }
if (dst) { if (dst)
{
GL_CACHE("TC depth: dst %s hit: %d (0x%x, %s)", to_string(dst->m_type), GL_CACHE("TC depth: dst %s hit: %d (0x%x, %s)", to_string(dst->m_type),
dst->m_texture ? dst->m_texture->GetID() : 0, dst->m_texture ? dst->m_texture->GetID() : 0,
TEX0.TBP0, psm_str(psm)); TEX0.TBP0, psm_str(psm));
@ -167,12 +183,15 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
// texture cache list. It means that a new Source is created everytime we need it. // texture cache list. It means that a new Source is created everytime we need it.
// If it is too expensive, one could cut memory allocation in Source constructor for this // If it is too expensive, one could cut memory allocation in Source constructor for this
// use case. // use case.
if (palette) { if (palette)
{
AttachPaletteToSource(src, psm_s.pal, true); AttachPaletteToSource(src, psm_s.pal, true);
} }
m_src.m_surfaces.insert(src); m_src.m_surfaces.insert(src);
} else { }
else
{
GL_CACHE("TC depth: ERROR miss (0x%x, %s)", TEX0.TBP0, psm_str(psm)); GL_CACHE("TC depth: ERROR miss (0x%x, %s)", TEX0.TBP0, psm_str(psm));
// Possible ? In this case we could call LookupSource // Possible ? In this case we could call LookupSource
// Or just put a basic texture // Or just put a basic texture
@ -182,10 +201,13 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
// Note: might worth to check previous frame // Note: might worth to check previous frame
// Note: otherwise return NULL and skip the draw // Note: otherwise return NULL and skip the draw
if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos) { if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos)
{
// JackieChan and SVCChaos cause regressions when skipping the draw calls so we reuse the old code for these two. // JackieChan and SVCChaos cause regressions when skipping the draw calls so we reuse the old code for these two.
return LookupSource(TEX0, TEXA, r); return LookupSource(TEX0, TEXA, r);
} else { }
else
{
// Full Spectrum Warrior: first draw call of cut-scene rendering // Full Spectrum Warrior: first draw call of cut-scene rendering
// The game tries to emulate a texture shuffle with an old depth buffer // The game tries to emulate a texture shuffle with an old depth buffer
// (don't exists yet for us due to the cache) // (don't exists yet for us due to the cache)
@ -222,7 +244,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
continue; continue;
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check // Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
if (!s->m_target) { if (!s->m_target)
{
// We request a palette texture (psm_s.pal). If the texture was // We request a palette texture (psm_s.pal). If the texture was
// converted by the CPU (!s->m_palette), we need to ensure // converted by the CPU (!s->m_palette), we need to ensure
// palette content is the same. // palette content is the same.
@ -269,8 +292,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
bool texture_inside_rt = ShallSearchTextureInsideRt(); bool texture_inside_rt = ShallSearchTextureInsideRt();
for(auto t : m_dst[RenderTarget]) { for (auto t : m_dst[RenderTarget])
if(t->m_used && t->m_dirty.empty()) { {
if (t->m_used && t->m_dirty.empty())
{
// Typical bug (MGS3 blue cloud): // Typical bug (MGS3 blue cloud):
// 1/ RT used as 32 bits => alpha channel written // 1/ RT used as 32 bits => alpha channel written
// 2/ RT used as 24 bits => no update of alpha channel // 2/ RT used as 24 bits => no update of alpha channel
@ -280,7 +305,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// Solution: consider the RT as 32 bits if the alpha was used in the past // Solution: consider the RT as 32 bits if the alpha was used in the past
uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) { if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm))
{
// It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will be slow but // It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will be slow but
// 1/ it just works :) // 1/ it just works :)
// 2/ even with upscaling // 2/ even with upscaling
@ -295,8 +321,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
dst = t; dst = t;
break; break;
}
} else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM)) { else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM))
{
// Detect half of the render target (fix snow engine game) // Detect half of the render target (fix snow engine game)
// Target Page (8KB) have always a width of 64 pixels // Target Page (8KB) have always a width of 64 pixels
// Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10
@ -304,8 +331,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
dst = t; dst = t;
break; break;
}
} else if (texture_inside_rt && psm == PSM_PSMCT32 && t->m_TEX0.PSM == psm && t->m_TEX0.TBP0 < bp && t->m_end_block >= bp) { else if (texture_inside_rt && psm == PSM_PSMCT32 && t->m_TEX0.PSM == psm && t->m_TEX0.TBP0 < bp && t->m_end_block >= bp)
{
// Only PSMCT32 to limit false hits // Only PSMCT32 to limit false hits
// Check if it is possible to hit with valid <x,y> offset on the given Target // Check if it is possible to hit with valid <x,y> offset on the given Target
@ -404,23 +432,28 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// //
// Sigh... They don't help us. // Sigh... They don't help us.
if (dst == NULL && m_can_convert_depth) { if (dst == NULL && m_can_convert_depth)
{
// Let's try a trick to avoid to use wrongly a depth buffer // Let's try a trick to avoid to use wrongly a depth buffer
// Unfortunately, I don't have any Arc the Lad testcase // Unfortunately, I don't have any Arc the Lad testcase
// //
// 1/ Check only current frame, I guess it is only used as a postprocessing effect // 1/ Check only current frame, I guess it is only used as a postprocessing effect
for(auto t : m_dst[DepthStencil]) { for (auto t : m_dst[DepthStencil])
{
if (!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) if (!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{ {
GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled"); GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled");
// Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the // Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the
// rescaling of the current function. // rescaling of the current function.
if (psm_s.bpp > 8) { if (psm_s.bpp > 8)
{
GIFRegTEX0 depth_TEX0; GIFRegTEX0 depth_TEX0;
depth_TEX0.u32[0] = TEX0.u32[0] | (0x30u << 20u); depth_TEX0.u32[0] = TEX0.u32[0] | (0x30u << 20u);
depth_TEX0.u32[1] = TEX0.u32[1]; depth_TEX0.u32[1] = TEX0.u32[1];
return LookupDepthSource(depth_TEX0, TEXA, r); return LookupDepthSource(depth_TEX0, TEXA, r);
} else { }
else
{
return LookupDepthSource(TEX0, TEXA, r, true); return LookupDepthSource(TEX0, TEXA, r, true);
} }
} }
@ -433,25 +466,30 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
if (src == NULL) if (src == NULL)
{ {
#ifdef ENABLE_OGL_DEBUG #ifdef ENABLE_OGL_DEBUG
if (dst) { if (dst)
{
GL_CACHE("TC: dst %s hit (%s): %d (0x%x, %s)", to_string(dst->m_type), half_right ? "half" : "full", GL_CACHE("TC: dst %s hit (%s): %d (0x%x, %s)", to_string(dst->m_type), half_right ? "half" : "full",
dst->m_texture ? dst->m_texture->GetID() : 0, dst->m_texture ? dst->m_texture->GetID() : 0,
TEX0.TBP0, psm_str(TEX0.PSM)); TEX0.TBP0, psm_str(TEX0.PSM));
} else { }
else
{
GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM)); GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM));
} }
#endif #endif
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset); src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset);
new_source = true; new_source = true;
}
} else { else
{
GL_CACHE("TC: src hit: %d (0x%x, 0x%x, %s)", GL_CACHE("TC: src hit: %d (0x%x, 0x%x, %s)",
src->m_texture ? src->m_texture->GetID() : 0, src->m_texture ? src->m_texture->GetID() : 0,
TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0,
psm_str(TEX0.PSM)); psm_str(TEX0.PSM));
} }
if (src->m_palette && !new_source && !src->ClutMatch({ clut, psm_s.pal })) { if (src->m_palette && !new_source && !src->ClutMatch({clut, psm_s.pal}))
{
AttachPaletteToSource(src, psm_s.pal, true); AttachPaletteToSource(src, psm_s.pal, true);
} }
@ -497,7 +535,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
Target* dst = NULL; Target* dst = NULL;
auto& list = m_dst[type]; auto& list = m_dst[type];
for(auto i = list.begin(); i != list.end(); ++i) { for (auto i = list.begin(); i != list.end(); ++i)
{
Target* t = *i; Target* t = *i;
if (bp == t->m_TEX0.TBP0) if (bp == t->m_TEX0.TBP0)
@ -513,32 +552,40 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
} }
} }
if (dst) { if (dst)
{
GL_CACHE("TC: Lookup Target(%s) %dx%d, hit: %d (0x%x, %s)", to_string(type), w, h, dst->m_texture->GetID(), bp, psm_str(TEX0.PSM)); GL_CACHE("TC: Lookup Target(%s) %dx%d, hit: %d (0x%x, %s)", to_string(type), w, h, dst->m_texture->GetID(), bp, psm_str(TEX0.PSM));
dst->Update(); dst->Update();
dst->m_dirty_alpha |= (psm_s.trbpp == 32 && (fbmask & 0xFF000000) != 0xFF000000) || (psm_s.trbpp == 16); dst->m_dirty_alpha |= (psm_s.trbpp == 32 && (fbmask & 0xFF000000) != 0xFF000000) || (psm_s.trbpp == 16);
}
} else if (m_can_convert_depth) { else if (m_can_convert_depth)
{
int rev_type = (type == DepthStencil) ? RenderTarget : DepthStencil; int rev_type = (type == DepthStencil) ? RenderTarget : DepthStencil;
// Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick // Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick
// some bad data. // some bad data.
Target* dst_match = nullptr; Target* dst_match = nullptr;
for(auto t : m_dst[rev_type]) { for (auto t : m_dst[rev_type])
if (bp == t->m_TEX0.TBP0) { {
if (t->m_age == 0) { if (bp == t->m_TEX0.TBP0)
{
if (t->m_age == 0)
{
dst_match = t; dst_match = t;
break; break;
} else if (t->m_age == 1) { }
else if (t->m_age == 1)
{
dst_match = t; dst_match = t;
} }
} }
} }
if (dst_match) { if (dst_match)
{
GSVector4 sRect(0, 0, 1, 1); GSVector4 sRect(0, 0, 1, 1);
GSVector4 dRect(0, 0, w, h); GSVector4 dRect(0, 0, w, h);
@ -547,10 +594,13 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
int shader; int shader;
bool fmt_16_bits = (psm_s.bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp == 16); bool fmt_16_bits = (psm_s.bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp == 16);
if (type == DepthStencil) { if (type == DepthStencil)
{
GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, %s was %s)", w, h, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM)); GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, %s was %s)", w, h, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM));
shader = (fmt_16_bits) ? ShaderConvert_RGB5A1_TO_FLOAT16 : ShaderConvert_RGBA8_TO_FLOAT32 + psm_s.fmt; shader = (fmt_16_bits) ? ShaderConvert_RGB5A1_TO_FLOAT16 : ShaderConvert_RGBA8_TO_FLOAT32 + psm_s.fmt;
} else { }
else
{
GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, %s was %s)", w, h, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM)); GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, %s was %s)", w, h, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM));
shader = (fmt_16_bits) ? ShaderConvert_FLOAT16_TO_RGB5A1 : ShaderConvert_FLOAT32_TO_RGBA8; shader = (fmt_16_bits) ? ShaderConvert_FLOAT16_TO_RGB5A1 : ShaderConvert_FLOAT32_TO_RGBA8;
} }
@ -576,7 +626,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
// From a performance point of view, it might cost a little on big upscaling // From a performance point of view, it might cost a little on big upscaling
// but normally few RT are miss so it must remain reasonable. // but normally few RT are miss so it must remain reasonable.
bool supported_fmt = m_can_convert_depth || psm_s.depth == 0; bool supported_fmt = m_can_convert_depth || psm_s.depth == 0;
if (m_preload_frame && TEX0.TBW > 0 && supported_fmt) { if (m_preload_frame && TEX0.TBW > 0 && supported_fmt)
{
GL_INS("Preloading the RT DATA"); GL_INS("Preloading the RT DATA");
// RT doesn't have height but if we use a too big value, we will read outside of the GS memory. // RT doesn't have height but if we use a too big value, we will read outside of the GS memory.
int page0 = TEX0.TBP0 >> 5; int page0 = TEX0.TBP0 >> 5;
@ -587,7 +638,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
dst->m_dirty.push_back(GSDirtyRect(GSVector4i(0, 0, TEX0.TBW * 64, max_h), TEX0.PSM)); dst->m_dirty.push_back(GSDirtyRect(GSVector4i(0, 0, TEX0.TBW * 64, max_h), TEX0.PSM));
dst->Update(); dst->Update();
} else { }
else
{
#ifdef ENABLE_OGL_DEBUG #ifdef ENABLE_OGL_DEBUG
switch (type) { switch (type) {
case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break; case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break;
@ -620,8 +673,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
#endif #endif
// Let's try to find a perfect frame that contains valid data // Let's try to find a perfect frame that contains valid data
for(auto t : m_dst[RenderTarget]) { for (auto t : m_dst[RenderTarget])
if(bp == t->m_TEX0.TBP0 && t->m_end_block >= bp) { {
if (bp == t->m_TEX0.TBP0 && t->m_end_block >= bp)
{
dst = t; dst = t;
GL_CACHE("TC: Lookup Frame %dx%d, perfect hit: %d (0x%x -> 0x%x %s)", w, h, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM)); GL_CACHE("TC: Lookup Frame %dx%d, perfect hit: %d (0x%x -> 0x%x %s)", w, h, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM));
@ -631,9 +686,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
} }
// 2nd try ! Try to find a frame that include the bp // 2nd try ! Try to find a frame that include the bp
if (dst == NULL) { if (dst == NULL)
for(auto t : m_dst[RenderTarget]) { {
if (t->m_TEX0.TBP0 < bp && bp <= t->m_end_block) { for (auto t : m_dst[RenderTarget])
{
if (t->m_TEX0.TBP0 < bp && bp <= t->m_end_block)
{
dst = t; dst = t;
GL_CACHE("TC: Lookup Frame %dx%d, inclusive hit: %d (0x%x, took 0x%x -> 0x%x %s)", w, h, t->m_texture->GetID(), bp, t->m_TEX0.TBP0, t->m_end_block, psm_str(TEX0.PSM)); GL_CACHE("TC: Lookup Frame %dx%d, inclusive hit: %d (0x%x, took 0x%x -> 0x%x %s)", w, h, t->m_texture->GetID(), bp, t->m_TEX0.TBP0, t->m_end_block, psm_str(TEX0.PSM));
@ -644,9 +702,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
} }
// 3rd try ! Try to find a frame that doesn't contain valid data (honestly I'm not sure we need to do it) // 3rd try ! Try to find a frame that doesn't contain valid data (honestly I'm not sure we need to do it)
if (dst == NULL) { if (dst == NULL)
for(auto t : m_dst[RenderTarget]) { {
if(bp == t->m_TEX0.TBP0) { for (auto t : m_dst[RenderTarget])
{
if (bp == t->m_TEX0.TBP0)
{
dst = t; dst = t;
GL_CACHE("TC: Lookup Frame %dx%d, empty hit: %d (0x%x -> 0x%x %s)", w, h, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM)); GL_CACHE("TC: Lookup Frame %dx%d, empty hit: %d (0x%x -> 0x%x %s)", w, h, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM));
@ -690,7 +751,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); // new frame buffers after reset should be cleared, don't display memory garbage m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); // new frame buffers after reset should be cleared, don't display memory garbage
if (m_preload_frame) { if (m_preload_frame)
{
// Load GS data into frame. Game can directly uploads a background or the full image in // Load GS data into frame. Game can directly uploads a background or the full image in
// "CTRC" buffer. It will also avoid various black screen issue in gs dump. // "CTRC" buffer. It will also avoid various black screen issue in gs dump.
// //
@ -737,14 +799,14 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
break; break;
} }
} }
} }
// Goal: invalidate data sent to the GPU when the source (GS memory) is modified // Goal: invalidate data sent to the GPU when the source (GS memory) is modified
// Called each time you want to write to the GS memory // Called each time you want to write to the GS memory
void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target) void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target)
{ {
if(!off) return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549. if (!off)
return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
uint32 bp = off->bp; uint32 bp = off->bp;
uint32 bw = off->bw; uint32 bw = off->bw;
@ -768,7 +830,8 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
} }
uint32 bbp = bp + bw * 0x10; uint32 bbp = bp + bw * 0x10;
if (bw >= 16 && bbp < 16384) { if (bw >= 16 && bbp < 16384)
{
// Detect half of the render target (fix snow engine game) // Detect half of the render target (fix snow engine game)
// Target Page (8KB) have always a width of 64 pixels // Target Page (8KB) have always a width of 64 pixels
// Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10
@ -788,13 +851,15 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
// Haunting ground write frame buffer 0x3000 and expect to write data to 0x3380 // Haunting ground write frame buffer 0x3000 and expect to write data to 0x3380
// Note: the game only does a 0 direct write. If some games expect some real data // Note: the game only does a 0 direct write. If some games expect some real data
// we are screwed. // we are screwed.
if (m_renderer->m_game.title == CRC::HauntingGround) { if (m_renderer->m_game.title == CRC::HauntingGround)
{
uint32 end_block = GSLocalMemory::m_psm[psm].bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats uint32 end_block = GSLocalMemory::m_psm[psm].bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats
auto type = RenderTarget; auto type = RenderTarget;
for (auto t : m_dst[type]) for (auto t : m_dst[type])
{ {
if (t->m_TEX0.TBP0 > bp && t->m_end_block <= end_block) { if (t->m_TEX0.TBP0 > bp && t->m_end_block <= end_block)
{
// Haunting ground expect to clean buffer B with a rendering into buffer A. // Haunting ground expect to clean buffer B with a rendering into buffer A.
// Situation is quite messy as it would require to extract the data from the buffer A // Situation is quite messy as it would require to extract the data from the buffer A
// and to move in buffer B. // and to move in buffer B.
@ -880,7 +945,8 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
} }
} }
if(!target) return; if (!target)
return;
for (int type = 0; type < 2; type++) for (int type = 0; type < 2; type++)
{ {
@ -916,7 +982,9 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
delete t; delete t;
continue; continue;
} }
} else if (bp == t->m_TEX0.TBP0) { }
else if (bp == t->m_TEX0.TBP0)
{
// EE writes the ALPHA channel. Mark it as invalid for // EE writes the ALPHA channel. Mark it as invalid for
// the texture cache. Otherwise it will generate a wrong // the texture cache. Otherwise it will generate a wrong
// hit on the texture cache. // hit on the texture cache.
@ -925,7 +993,8 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
} }
// GH: Try to detect texture write that will overlap with a target buffer // GH: Try to detect texture write that will overlap with a target buffer
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM)) { if (GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
if (bp < t->m_TEX0.TBP0) if (bp < t->m_TEX0.TBP0)
{ {
uint32 rowsize = bw * 8192; uint32 rowsize = bw * 8192;
@ -955,11 +1024,13 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
// Ben 10 Alien Force : Vilgax Attacks uses a small temporary target for multiple textures (different bw) // Ben 10 Alien Force : Vilgax Attacks uses a small temporary target for multiple textures (different bw)
// It is too complex to handle, and purpose of the code was to handle FMV (large bw). So let's skip small // It is too complex to handle, and purpose of the code was to handle FMV (large bw). So let's skip small
// (128 pixels) target // (128 pixels) target
if (bw > 2 && t->m_TEX0.TBW == bw && t->Inside(bp, bw, psm, rect) && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) { if (bw > 2 && t->m_TEX0.TBW == bw && t->Inside(bp, bw, psm, rect) && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
{
uint32 rowsize = bw * 8192u; uint32 rowsize = bw * 8192u;
uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256); uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256);
if(rowsize > 0 && offset % rowsize == 0) { if (rowsize > 0 && offset % rowsize == 0)
{
int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize;
GL_CACHE("TC: Dirty in the middle of Target(%s) %d (0x%x->0x%x) pos(%d,%d => %d,%d) bw:%u", to_string(type), GL_CACHE("TC: Dirty in the middle of Target(%s) %d (0x%x->0x%x) pos(%d,%d => %d,%d) bw:%u", to_string(type),
@ -987,11 +1058,15 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r)
//uint32 bw = off->bw; //uint32 bw = off->bw;
// No depth handling please. // No depth handling please.
if (psm == PSM_PSMZ32 || psm == PSM_PSMZ24 || psm == PSM_PSMZ16 || psm == PSM_PSMZ16S) { if (psm == PSM_PSMZ32 || psm == PSM_PSMZ24 || psm == PSM_PSMZ16 || psm == PSM_PSMZ16S)
{
GL_INS("ERROR: InvalidateLocalMem depth format isn't supported (%d,%d to %d,%d)", r.x, r.y, r.z, r.w); GL_INS("ERROR: InvalidateLocalMem depth format isn't supported (%d,%d to %d,%d)", r.x, r.y, r.z, r.w);
if (m_can_convert_depth) { if (m_can_convert_depth)
for(auto t : m_dst[DepthStencil]) { {
if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { for (auto t : m_dst[DepthStencil])
{
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
if (GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) if (GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
Read(t, r.rintersect(t->m_valid)); Read(t, r.rintersect(t->m_valid));
} }
@ -1020,16 +1095,21 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r)
// note: r.rintersect breaks Wizardry and Chaos Legion // note: r.rintersect breaks Wizardry and Chaos Legion
// Read(t, t->m_valid) works in all tested games but is very slow in GUST titles >< // Read(t, t->m_valid) works in all tested games but is very slow in GUST titles ><
if (GSTextureCache::m_disable_partial_invalidation) { if (GSTextureCache::m_disable_partial_invalidation)
{
Read(t, r.rintersect(t->m_valid)); Read(t, r.rintersect(t->m_valid));
} else { }
else
{
if (r.x == 0 && r.y == 0) // Full screen read? if (r.x == 0 && r.y == 0) // Full screen read?
Read(t, t->m_valid); Read(t, t->m_valid);
else // Block level read? else // Block level read?
Read(t, r.rintersect(t->m_valid)); Read(t, r.rintersect(t->m_valid));
} }
} }
} else { }
else
{
GL_INS("ERROR: InvalidateLocalMem target is a depth format"); GL_INS("ERROR: InvalidateLocalMem target is a depth format");
} }
} }
@ -1115,17 +1195,20 @@ void GSTextureCache::InvalidateVideoMemSubTarget(GSTextureCache::Target* rt)
auto& list = m_dst[RenderTarget]; auto& list = m_dst[RenderTarget];
for(auto i = list.begin(); i != list.end(); ) { for (auto i = list.begin(); i != list.end();)
{
Target* t = *i; Target* t = *i;
if((t->m_TEX0.TBP0 > rt->m_TEX0.TBP0) && (t->m_end_block < rt->m_end_block) && (t->m_TEX0.TBW == rt->m_TEX0.TBW) if ((t->m_TEX0.TBP0 > rt->m_TEX0.TBP0) && (t->m_end_block < rt->m_end_block) && (t->m_TEX0.TBW == rt->m_TEX0.TBW) && (t->m_TEX0.TBP0 < t->m_end_block))
&& (t->m_TEX0.TBP0 < t->m_end_block)) { {
GL_INS("InvalidateVideoMemSubTarget: rt 0x%x -> 0x%x, sub rt 0x%x -> 0x%x", GL_INS("InvalidateVideoMemSubTarget: rt 0x%x -> 0x%x, sub rt 0x%x -> 0x%x",
rt->m_TEX0.TBP0, rt->m_end_block, t->m_TEX0.TBP0, t->m_end_block); rt->m_TEX0.TBP0, rt->m_end_block, t->m_TEX0.TBP0, t->m_end_block);
i = list.erase(i); i = list.erase(i);
delete t; delete t;
} else { }
else
{
++i; ++i;
} }
} }
@ -1140,14 +1223,18 @@ void GSTextureCache::IncAge()
{ {
Source* s = *i; Source* s = *i;
if(s->m_shared_texture) { if (s->m_shared_texture)
{
// Shared textures are temporary only added in the hash set but not in the texture // Shared textures are temporary only added in the hash set but not in the texture
// cache list therefore you can't use RemoveAt // cache list therefore you can't use RemoveAt
i = m_src.m_surfaces.erase(i); i = m_src.m_surfaces.erase(i);
delete s; delete s;
} else { }
else
{
++i; ++i;
if (++s->m_age > maxage) { if (++s->m_age > maxage)
{
m_src.RemoveAt(s); m_src.RemoveAt(s);
} }
} }
@ -1172,7 +1259,8 @@ void GSTextureCache::IncAge()
// probability that game will do it on the current RT. // probability that game will do it on the current RT.
// Variable is cleared here to avoid issue with game that uses a 16 bits // Variable is cleared here to avoid issue with game that uses a 16 bits
// render target // render target
if (t->m_age > 0) { if (t->m_age > 0)
{
// GoW2 uses the effect at the start of the frame // GoW2 uses the effect at the start of the frame
t->m_32_bits_fmt = false; t->m_32_bits_fmt = false;
} }
@ -1185,7 +1273,9 @@ void GSTextureCache::IncAge()
t->m_TEX0.TBP0); t->m_TEX0.TBP0);
delete t; delete t;
} else { }
else
{
++i; ++i;
} }
} }
@ -1226,7 +1316,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
src->m_texture->SetScale(scale); src->m_texture->SetScale(scale);
src->m_end_block = dst->m_end_block; src->m_end_block = dst->m_end_block;
if (psm.pal > 0) { if (psm.pal > 0)
{
// Attach palette for GPU texture conversion // Attach palette for GPU texture conversion
AttachPaletteToSource(src, psm.pal, true); AttachPaletteToSource(src, psm.pal, true);
} }
@ -1265,18 +1356,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY; int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY;
bool is_8bits = TEX0.PSM == PSM_PSMT8; bool is_8bits = TEX0.PSM == PSM_PSMT8;
if (is_8bits) { if (is_8bits)
{
GL_INS("Reading RT as a packed-indexed 8 bits format"); GL_INS("Reading RT as a packed-indexed 8 bits format");
shader = ShaderConvert_RGBA_TO_8I; shader = ShaderConvert_RGBA_TO_8I;
} }
#ifdef ENABLE_OGL_DEBUG #ifdef ENABLE_OGL_DEBUG
if (TEX0.PSM == PSM_PSMT4) { if (TEX0.PSM == PSM_PSMT4)
{
GL_INS("ERROR: Reading RT as a packed-indexed 4 bits format is not supported"); GL_INS("ERROR: Reading RT as a packed-indexed 4 bits format is not supported");
} }
#endif #endif
if (GSLocalMemory::m_psm[TEX0.PSM].bpp > 8) { if (GSLocalMemory::m_psm[TEX0.PSM].bpp > 8)
{
src->m_32_bits_fmt = dst->m_32_bits_fmt; src->m_32_bits_fmt = dst->m_32_bits_fmt;
} }
@ -1293,7 +1387,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
int w = (int)(dst->m_texture->GetScale().x * tw); int w = (int)(dst->m_texture->GetScale().x * tw);
int h = (int)(dst->m_texture->GetScale().y * th); int h = (int)(dst->m_texture->GetScale().y * th);
if (is_8bits) { if (is_8bits)
{
// Unscale 8 bits textures, quality won't be nice but format is really awful // Unscale 8 bits textures, quality won't be nice but format is really awful
w = tw; w = tw;
h = th; h = th;
@ -1388,7 +1483,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// 2/ It doesn't support rescaling of the RT (tw = 1024) // 2/ It doesn't support rescaling of the RT (tw = 1024)
// Maybe it will be more easy to just round the UV value in the Vertex Shader // Maybe it will be more easy to just round the UV value in the Vertex Shader
if (!is_8bits) { if (!is_8bits)
{
// 8 bits handling is special due to unscaling. It is better to not execute this code // 8 bits handling is special due to unscaling. It is better to not execute this code
if (w > dstsize.x) if (w > dstsize.x)
{ {
@ -1416,7 +1512,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format // GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format
// However it is different here. We want to reuse a Render Target as a texture. // However it is different here. We want to reuse a Render Target as a texture.
// Because the texture is already on the GPU, CPU can't convert it. // Because the texture is already on the GPU, CPU can't convert it.
if (psm.pal > 0) { if (psm.pal > 0)
{
AttachPaletteToSource(src, psm.pal, true); AttachPaletteToSource(src, psm.pal, true);
} }
// Disable linear filtering for various GS post-processing effect // Disable linear filtering for various GS post-processing effect
@ -1447,13 +1544,16 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
if ((sRect == dRect).alltrue() && !shader) if ((sRect == dRect).alltrue() && !shader)
{ {
if (half_right) { if (half_right)
{
// You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT // You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT
// which is arbitrary set to 1280 (biggest RT used by GS). h/w are based on the input texture // which is arbitrary set to 1280 (biggest RT used by GS). h/w are based on the input texture
// so the only reliable way to find the real size of the target is to use the TBW value. // so the only reliable way to find the real size of the target is to use the TBW value.
float real_width = dst->m_TEX0.TBW * 64u * dst->m_texture->GetScale().x; float real_width = dst->m_TEX0.TBW * 64u * dst->m_texture->GetScale().x;
m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i((int)(real_width / 2.0f), 0, (int)real_width, h)); m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i((int)(real_width / 2.0f), 0, (int)real_width, h));
} else { }
else
{
m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i(0, 0, w, h)); // <= likely wrong dstsize.x could be bigger than w m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i(0, 0, w, h)); // <= likely wrong dstsize.x could be bigger than w
} }
} }
@ -1463,7 +1563,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
sRect.z /= sTex->GetWidth(); sRect.z /= sTex->GetWidth();
sRect.w /= sTex->GetHeight(); sRect.w /= sTex->GetHeight();
if (half_right) { if (half_right)
{
sRect.x = sRect.z / 2.0f; sRect.x = sRect.z / 2.0f;
} }
@ -1513,9 +1614,11 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
src->m_texture = m_renderer->m_dev->CreateTexture(tw, th, Get8bitFormat()); src->m_texture = m_renderer->m_dev->CreateTexture(tw, th, Get8bitFormat());
AttachPaletteToSource(src, psm.pal, true); AttachPaletteToSource(src, psm.pal, true);
} }
else { else
{
src->m_texture = m_renderer->m_dev->CreateTexture(tw, th); src->m_texture = m_renderer->m_dev->CreateTexture(tw, th);
if (psm.pal > 0) { if (psm.pal > 0)
{
AttachPaletteToSource(src, psm.pal, false); AttachPaletteToSource(src, psm.pal, false);
} }
} }
@ -1561,19 +1664,23 @@ void GSTextureCache::PrintMemoryUsage()
uint32 tex_rt = 0; uint32 tex_rt = 0;
uint32 rt = 0; uint32 rt = 0;
uint32 dss = 0; uint32 dss = 0;
for(auto s : m_src.m_surfaces) { for (auto s : m_src.m_surfaces)
if(s && !s->m_shared_texture) { {
if (s && !s->m_shared_texture)
{
if (s->m_target) if (s->m_target)
tex_rt += s->m_texture->GetMemUsage(); tex_rt += s->m_texture->GetMemUsage();
else else
tex += s->m_texture->GetMemUsage(); tex += s->m_texture->GetMemUsage();
} }
} }
for(auto t : m_dst[RenderTarget]) { for (auto t : m_dst[RenderTarget])
{
if (t) if (t)
rt += t->m_texture->GetMemUsage(); rt += t->m_texture->GetMemUsage();
} }
for(auto t : m_dst[DepthStencil]) { for (auto t : m_dst[DepthStencil])
{
if (t) if (t)
dss += t->m_texture->GetMemUsage(); dss += t->m_texture->GetMemUsage();
} }
@ -1640,15 +1747,17 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR
m_TEX0 = TEX0; m_TEX0 = TEX0;
m_TEXA = TEXA; m_TEXA = TEXA;
if (dummy_container) { if (dummy_container)
{
// Dummy container only contain a m_texture that is a pointer to another source. // Dummy container only contain a m_texture that is a pointer to another source.
m_write.rect = NULL; m_write.rect = NULL;
m_write.count = 0; m_write.count = 0;
m_repeating = false; m_repeating = false;
}
} else { else
{
memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0)); memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0));
memset(m_valid, 0, sizeof(m_valid)); memset(m_valid, 0, sizeof(m_valid));
@ -1887,7 +1996,8 @@ void GSTextureCache::Source::Flush(uint32 count, int layer)
m_write.count -= count; m_write.count -= count;
} }
bool GSTextureCache::Source::ClutMatch(PaletteKey palette_key) { bool GSTextureCache::Source::ClutMatch(PaletteKey palette_key)
{
return PaletteKeyEqual()(palette_key, m_palette_obj->GetPaletteKey()); return PaletteKeyEqual()(palette_key, m_palette_obj->GetPaletteKey());
} }
@ -1927,15 +2037,19 @@ void GSTextureCache::Target::Update()
GSVector4i r = m_dirty.GetDirtyRectAndClear(m_TEX0, t_size); GSVector4i r = m_dirty.GetDirtyRectAndClear(m_TEX0, t_size);
if (r.rempty()) return; if (r.rempty())
return;
// No handling please // No handling please
if ((m_type == DepthStencil) && !m_depth_supported) { if ((m_type == DepthStencil) && !m_depth_supported)
{
// do the most likely thing a direct write would do, clear it // do the most likely thing a direct write would do, clear it
GL_INS("ERROR: Update DepthStencil dummy"); GL_INS("ERROR: Update DepthStencil dummy");
return; return;
} else if (m_type == DepthStencil && m_renderer->m_game.title == CRC::FFX2) { }
else if (m_type == DepthStencil && m_renderer->m_game.title == CRC::FFX2)
{
GL_INS("ERROR: bad invalidation detected, depth buffer will be cleared"); GL_INS("ERROR: bad invalidation detected, depth buffer will be cleared");
// FFX2 menu. Invalidation of the depth is wrongly done and only the first // FFX2 menu. Invalidation of the depth is wrongly done and only the first
// page is invalidated. Technically a CRC hack will be better but I don't expect // page is invalidated. Technically a CRC hack will be better but I don't expect
@ -2052,7 +2166,8 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
void GSTextureCache::SourceMap::RemoveAll() void GSTextureCache::SourceMap::RemoveAll()
{ {
for (auto s : m_surfaces) delete s; for (auto s : m_surfaces)
delete s;
m_surfaces.clear(); m_surfaces.clear();
@ -2118,26 +2233,32 @@ GSTextureCache::Palette::Palette(const GSRenderer* renderer, uint16 pal, bool ne
uint16 palette_size = pal * sizeof(uint32); uint16 palette_size = pal * sizeof(uint32);
m_clut = (uint32*)_aligned_malloc(palette_size, 64); m_clut = (uint32*)_aligned_malloc(palette_size, 64);
memcpy(m_clut, (const uint32*)m_renderer->m_mem.m_clut, palette_size); memcpy(m_clut, (const uint32*)m_renderer->m_mem.m_clut, palette_size);
if (need_gs_texture) { if (need_gs_texture)
{
InitializeTexture(); InitializeTexture();
} }
} }
GSTextureCache::Palette::~Palette() { GSTextureCache::Palette::~Palette()
{
m_renderer->m_dev->Recycle(m_tex_palette); m_renderer->m_dev->Recycle(m_tex_palette);
_aligned_free(m_clut); _aligned_free(m_clut);
} }
GSTexture* GSTextureCache::Palette::GetPaletteGSTexture() { GSTexture* GSTextureCache::Palette::GetPaletteGSTexture()
{
return m_tex_palette; return m_tex_palette;
} }
GSTextureCache::PaletteKey GSTextureCache::Palette::GetPaletteKey() { GSTextureCache::PaletteKey GSTextureCache::Palette::GetPaletteKey()
{
return {m_clut, m_pal}; return {m_clut, m_pal};
} }
void GSTextureCache::Palette::InitializeTexture() { void GSTextureCache::Palette::InitializeTexture()
if (!m_tex_palette) { {
if (!m_tex_palette)
{
// A palette texture is always created with dimensions 256x1 (also in the case that m_pal is 16, thus a 16x1 texture // A palette texture is always created with dimensions 256x1 (also in the case that m_pal is 16, thus a 16x1 texture
// would be enough to store the CLUT data) because the coordinates that the shader uses for // would be enough to store the CLUT data) because the coordinates that the shader uses for
// sampling such texture are always normalized by 255. // sampling such texture are always normalized by 255.
@ -2157,14 +2278,16 @@ void GSTextureCache::Palette::InitializeTexture() {
// it is computed in 16 passes, // it is computed in 16 passes,
// 2) The clut can contain many 0s, so as a way to increase the spread of hashing values for small changes in the input clut the hashing function // 2) The clut can contain many 0s, so as a way to increase the spread of hashing values for small changes in the input clut the hashing function
// is using addition in combination with logical XOR operator; The addition constants are large prime numbers, which may help in achieving what intended. // is using addition in combination with logical XOR operator; The addition constants are large prime numbers, which may help in achieving what intended.
std::size_t GSTextureCache::PaletteKeyHash::operator()(const PaletteKey &key) const { std::size_t GSTextureCache::PaletteKeyHash::operator()(const PaletteKey& key) const
{
uint16 pal = key.pal; uint16 pal = key.pal;
const uint32* clut = key.clut; const uint32* clut = key.clut;
ASSERT((pal & 15) == 0); ASSERT((pal & 15) == 0);
size_t clut_hash = 3831179159; size_t clut_hash = 3831179159;
for (uint16 i = 0; i < pal; i += 16) { for (uint16 i = 0; i < pal; i += 16)
{
clut_hash = (clut_hash + 1488000301) ^ (clut[i ] + 33644011); clut_hash = (clut_hash + 1488000301) ^ (clut[i ] + 33644011);
clut_hash = (clut_hash + 3831179159) ^ (clut[i + 1] + 47627467); clut_hash = (clut_hash + 3831179159) ^ (clut[i + 1] + 47627467);
clut_hash = (clut_hash + 3659574209) ^ (clut[i + 2] + 577038523); clut_hash = (clut_hash + 3659574209) ^ (clut[i + 2] + 577038523);
@ -2190,8 +2313,10 @@ std::size_t GSTextureCache::PaletteKeyHash::operator()(const PaletteKey &key) co
// GSTextureCache::PaletteKeyEqual // GSTextureCache::PaletteKeyEqual
bool GSTextureCache::PaletteKeyEqual::operator()(const PaletteKey &lhs, const PaletteKey &rhs) const { bool GSTextureCache::PaletteKeyEqual::operator()(const PaletteKey& lhs, const PaletteKey& rhs) const
if (lhs.pal != rhs.pal) { {
if (lhs.pal != rhs.pal)
{
return false; return false;
} }
@ -2203,12 +2328,14 @@ bool GSTextureCache::PaletteKeyEqual::operator()(const PaletteKey &lhs, const Pa
GSTextureCache::PaletteMap::PaletteMap(const GSRenderer* renderer) GSTextureCache::PaletteMap::PaletteMap(const GSRenderer* renderer)
: m_renderer(renderer) : m_renderer(renderer)
{ {
for (auto& map : m_maps) { for (auto& map : m_maps)
{
map.reserve(MAX_SIZE); map.reserve(MAX_SIZE);
} }
} }
std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalette(uint16 pal, bool need_gs_texture) { std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalette(uint16 pal, bool need_gs_texture)
{
ASSERT(pal == 16 || pal == 256); ASSERT(pal == 16 || pal == 256);
// Choose which hash map search into: // Choose which hash map search into:
@ -2223,9 +2350,11 @@ std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalet
auto it1 = map.find(palette_key); auto it1 = map.find(palette_key);
if (it1 != map.end()) { if (it1 != map.end())
{
// Clut content match, HIT // Clut content match, HIT
if (need_gs_texture && !it1->second->GetPaletteGSTexture()) { if (need_gs_texture && !it1->second->GetPaletteGSTexture())
{
// Generate GSTexture and upload clut content if needed and not done yet // Generate GSTexture and upload clut content if needed and not done yet
it1->second->InitializeTexture(); it1->second->InitializeTexture();
} }
@ -2234,31 +2363,37 @@ std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalet
// No palette with matching clut content, MISS // No palette with matching clut content, MISS
if (map.size() > MAX_SIZE) { if (map.size() > MAX_SIZE)
{
// If the map is too big, try to clean it by disposing and removing unused palettes, before adding the new one // If the map is too big, try to clean it by disposing and removing unused palettes, before adding the new one
GL_INS("WARNING, %u-bit PaletteMap (Size %u): Max size %u exceeded, clearing unused palettes.", pal * sizeof(uint32), map.size(), MAX_SIZE); GL_INS("WARNING, %u-bit PaletteMap (Size %u): Max size %u exceeded, clearing unused palettes.", pal * sizeof(uint32), map.size(), MAX_SIZE);
uint32 current_size = map.size(); uint32 current_size = map.size();
for (auto it = map.begin(); it != map.end(); ) { for (auto it = map.begin(); it != map.end();)
{
// If the palette is unused, there is only one shared pointers holding a reference to the unused Palette object, // If the palette is unused, there is only one shared pointers holding a reference to the unused Palette object,
// and this shared pointer is the one stored in the map itself // and this shared pointer is the one stored in the map itself
if (it->second.use_count() <= 1) { if (it->second.use_count() <= 1)
{
// Palette is unused // Palette is unused
it = map.erase(it); // Erase element from map it = map.erase(it); // Erase element from map
// The palette object should now be gone as the shared pointer to the object in the map is deleted // The palette object should now be gone as the shared pointer to the object in the map is deleted
} }
else { else
{
++it; ++it;
} }
} }
uint32 cleared_palette_count = current_size - (uint32)map.size(); uint32 cleared_palette_count = current_size - (uint32)map.size();
if (cleared_palette_count == 0) { if (cleared_palette_count == 0)
{
GL_INS("ERROR, %u-bit PaletteMap (Size %u): Max size %u exceeded, could not clear any palette, negative performance impact.", pal * sizeof(uint32), map.size(), MAX_SIZE); GL_INS("ERROR, %u-bit PaletteMap (Size %u): Max size %u exceeded, could not clear any palette, negative performance impact.", pal * sizeof(uint32), map.size(), MAX_SIZE);
} }
else { else
{
map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing
GL_INS("INFO, %u-bit PaletteMap (Size %u): Cleared %u palettes.", pal * sizeof(uint32), map.size(), cleared_palette_count); GL_INS("INFO, %u-bit PaletteMap (Size %u): Cleared %u palettes.", pal * sizeof(uint32), map.size(), cleared_palette_count);
} }
@ -2273,10 +2408,11 @@ std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalet
return palette; return palette;
} }
void GSTextureCache::PaletteMap::Clear() { void GSTextureCache::PaletteMap::Clear()
for (auto& map : m_maps) { {
for (auto& map : m_maps)
{
map.clear(); // Clear all the nodes of the map, deleting Palette objects managed by shared pointers as they should be unused elsewhere map.clear(); // Clear all the nodes of the map, deleting Palette objects managed by shared pointers as they should be unused elsewhere
map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing
} }
} }

View File

@ -28,7 +28,11 @@
class GSTextureCache class GSTextureCache
{ {
public: public:
enum {RenderTarget, DepthStencil}; enum
{
RenderTarget,
DepthStencil
};
class Surface : public GSAlignedClass<32> class Surface : public GSAlignedClass<32>
{ {
@ -54,7 +58,8 @@ public:
bool Overlaps(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect); bool Overlaps(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect);
}; };
struct PaletteKey { struct PaletteKey
{
const uint32* clut; const uint32* clut;
uint16 pal; uint16 pal;
}; };
@ -86,19 +91,25 @@ public:
void InitializeTexture(); void InitializeTexture();
}; };
struct PaletteKeyHash { struct PaletteKeyHash
{
// Calculate hash // Calculate hash
std::size_t operator()(const PaletteKey& key) const; std::size_t operator()(const PaletteKey& key) const;
}; };
struct PaletteKeyEqual { struct PaletteKeyEqual
{
// Compare pal value and clut contents // Compare pal value and clut contents
bool operator()(const PaletteKey& lhs, const PaletteKey& rhs) const; bool operator()(const PaletteKey& lhs, const PaletteKey& rhs) const;
}; };
class Source : public Surface class Source : public Surface
{ {
struct {GSVector4i* rect; uint32 count;} m_write; struct
{
GSVector4i* rect;
uint32 count;
} m_write;
void Write(const GSVector4i& r, int layer); void Write(const GSVector4i& r, int layer);
void Flush(uint32 count, int layer); void Flush(uint32 count, int layer);
@ -178,7 +189,11 @@ public:
uint32 m_pages[16]; // bitmap of all pages uint32 m_pages[16]; // bitmap of all pages
bool m_used; bool m_used;
SourceMap() : m_used(false) {memset(m_pages, 0, sizeof(m_pages));} SourceMap()
: m_used(false)
{
memset(m_pages, 0, sizeof(m_pages));
}
void Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off); void Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off);
void RemoveAll(); void RemoveAll();
@ -249,7 +264,8 @@ public:
bool ShallSearchTextureInsideRt(); bool ShallSearchTextureInsideRt();
const char* to_string(int type) { const char* to_string(int type)
{
return (type == DepthStencil) ? "Depth" : "Color"; return (type == DepthStencil) ? "Depth" : "Color";
} }

View File

@ -34,7 +34,12 @@ struct alignas(32) GSVertexHW9
// t.z = union {struct {uint8 r, g, b, a;}; uint32 c0;}; // t.z = union {struct {uint8 r, g, b, a;}; uint32 c0;};
// t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;} // t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;}
GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;} GSVertexHW9& operator=(GSVertexHW9& v)
{
t = v.t;
p = v.p;
return *this;
}
}; };
#pragma pack(pop) #pragma pack(pop)

View File

@ -41,4 +41,3 @@ GSTexture* GSDeviceNull::CreateSurface(int type, int w, int h, int format)
{ {
return new GSTextureNull(type, w, h, format); return new GSTextureNull(type, w, h, format);
} }

View File

@ -39,4 +39,3 @@ public:
bool Create(const std::shared_ptr<GSWnd>& wnd); bool Create(const std::shared_ptr<GSWnd>& wnd);
bool Reset(int w, int h); bool Reset(int w, int h);
}; };

View File

@ -28,7 +28,10 @@ class GSRendererNull : public GSRenderer
class GSVertexTraceNull : public GSVertexTrace class GSVertexTraceNull : public GSVertexTrace
{ {
public: public:
GSVertexTraceNull(const GSState* state) : GSVertexTrace(state) {} GSVertexTraceNull(const GSState* state)
: GSVertexTrace(state)
{
}
}; };
protected: protected:

View File

@ -25,7 +25,10 @@
class GSTextureNull : public GSTexture class GSTextureNull : public GSTexture
{ {
struct {int type, w, h, format;} m_desc; struct
{
int type, w, h, format;
} m_desc;
public: public:
GSTextureNull(); GSTextureNull();

View File

@ -121,12 +121,14 @@ extern PFNGLTEXTUREPAGECOMMITMENTEXTPROC glTexturePageCommitmentEXT;
#include "PFN_GLLOADER_HPP.h" #include "PFN_GLLOADER_HPP.h"
namespace GLExtension { namespace GLExtension
{
extern bool Has(const std::string& ext); extern bool Has(const std::string& ext);
extern void Set(const std::string& ext, bool v = true); extern void Set(const std::string& ext, bool v = true);
} } // namespace GLExtension
namespace GLLoader { namespace GLLoader
{
void check_gl_requirements(); void check_gl_requirements();
extern bool vendor_id_amd; extern bool vendor_id_amd;
@ -145,4 +147,4 @@ namespace GLLoader {
extern bool found_compatible_GL_ARB_sparse_texture2; extern bool found_compatible_GL_ARB_sparse_texture2;
extern bool found_compatible_sparse_depth; extern bool found_compatible_sparse_depth;
} } // namespace GLLoader

View File

@ -22,7 +22,8 @@
#include "stdafx.h" #include "stdafx.h"
#include "GLState.h" #include "GLState.h"
namespace GLState { namespace GLState
{
GLuint fbo; GLuint fbo;
GSVector2i viewport; GSVector2i viewport;
GSVector4i scissor; GSVector4i scissor;
@ -59,7 +60,8 @@ namespace GLState {
int64 available_vram; int64 available_vram;
void Clear() { void Clear()
{
fbo = 0; fbo = 0;
viewport = GSVector2i(0, 0); viewport = GSVector2i(0, 0);
scissor = GSVector4i(0, 0, 0, 0); scissor = GSVector4i(0, 0, 0, 0);
@ -100,4 +102,4 @@ namespace GLState {
// (256MB are reserved for PBO/IBO/VBO/UBO buffers) // (256MB are reserved for PBO/IBO/VBO/UBO buffers)
available_vram = (4096u - 256u) * 1024u * 1024u; available_vram = (4096u - 256u) * 1024u * 1024u;
} }
} } // namespace GLState

View File

@ -24,7 +24,8 @@
#include "GSdx.h" #include "GSdx.h"
#include "GSVector.h" #include "GSVector.h"
namespace GLState { namespace GLState
{
extern GLuint fbo; // frame buffer object extern GLuint fbo; // frame buffer object
extern GSVector2i viewport; extern GSVector2i viewport;
extern GSVector4i scissor; extern GSVector4i scissor;
@ -62,4 +63,4 @@ namespace GLState {
extern int64 available_vram; extern int64 available_vram;
extern void Clear(); extern void Clear();
} } // namespace GLState

View File

@ -96,7 +96,8 @@ GSDeviceOGL::GSDeviceOGL()
GSDeviceOGL::~GSDeviceOGL() GSDeviceOGL::~GSDeviceOGL()
{ {
if (m_debug_gl_file) { if (m_debug_gl_file)
{
fclose(m_debug_gl_file); fclose(m_debug_gl_file);
m_debug_gl_file = NULL; m_debug_gl_file = NULL;
} }
@ -143,7 +144,8 @@ GSDeviceOGL::~GSDeviceOGL()
glDeleteSamplers(countof(m_ps_ss), m_ps_ss); glDeleteSamplers(countof(m_ps_ss), m_ps_ss);
for (uint32 key = 0; key < countof(m_om_dss); key++) delete m_om_dss[key]; for (uint32 key = 0; key < countof(m_om_dss); key++)
delete m_om_dss[key];
PboPool::Destroy(); PboPool::Destroy();
@ -154,14 +156,16 @@ GSDeviceOGL::~GSDeviceOGL()
void GSDeviceOGL::GenerateProfilerData() void GSDeviceOGL::GenerateProfilerData()
{ {
if (m_profiler.last_query < 3) { if (m_profiler.last_query < 3)
{
glDeleteQueries(1 << 16, m_profiler.timer_query); glDeleteQueries(1 << 16, m_profiler.timer_query);
return; return;
} }
// Wait latest quey to get valid result // Wait latest quey to get valid result
GLuint available = 0; GLuint available = 0;
while (!available) { while (!available)
{
glGetQueryObjectuiv(m_profiler.timer(), GL_QUERY_RESULT_AVAILABLE, &available); glGetQueryObjectuiv(m_profiler.timer(), GL_QUERY_RESULT_AVAILABLE, &available);
} }
@ -174,7 +178,8 @@ void GSDeviceOGL::GenerateProfilerData()
int first_query = replay > 1 ? m_profiler.last_query / replay : 0; int first_query = replay > 1 ? m_profiler.last_query / replay : 0;
glGetQueryObjectui64v(m_profiler.timer_query[first_query], GL_QUERY_RESULT, &time_start); glGetQueryObjectui64v(m_profiler.timer_query[first_query], GL_QUERY_RESULT, &time_start);
for (uint32 q = first_query + 1; q < m_profiler.last_query; q++) { for (uint32 q = first_query + 1; q < m_profiler.last_query; q++)
{
glGetQueryObjectui64v(m_profiler.timer_query[q], GL_QUERY_RESULT, &time_end); glGetQueryObjectui64v(m_profiler.timer_query[q], GL_QUERY_RESULT, &time_end);
uint64 t = time_end - time_start; uint64 t = time_end - time_start;
times.push_back((double)t * ms); times.push_back((double)t * ms);
@ -193,16 +198,20 @@ void GSDeviceOGL::GenerateProfilerData()
auto minmax_time = std::minmax_element(times.begin(), times.end()); auto minmax_time = std::minmax_element(times.begin(), times.end());
for (auto t : times) mean += t; for (auto t : times)
mean += t;
mean = mean / frames; mean = mean / frames;
for (auto t : times) sd += pow(t-mean, 2); for (auto t : times)
sd += pow(t - mean, 2);
sd = sqrt(sd / frames); sd = sqrt(sd / frames);
uint32 time_repartition[16] = {0}; uint32 time_repartition[16] = {0};
for (auto t : times) { for (auto t : times)
{
uint32 slot = (uint32)(t / 2.0); uint32 slot = (uint32)(t / 2.0);
if (slot >= countof(time_repartition)) { if (slot >= countof(time_repartition))
{
slot = countof(time_repartition) - 1; slot = countof(time_repartition) - 1;
} }
time_repartition[slot]++; time_repartition[slot]++;
@ -215,13 +224,16 @@ void GSDeviceOGL::GenerateProfilerData()
fprintf(stderr, "SD %4.2f ms\n", sd); fprintf(stderr, "SD %4.2f ms\n", sd);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "Frame Repartition\n"); fprintf(stderr, "Frame Repartition\n");
for (uint32 i = 0; i < countof(time_repartition); i ++) { for (uint32 i = 0; i < countof(time_repartition); i++)
{
fprintf(stderr, "%3u ms => %3u ms\t%4u\n", 2 * i, 2 * (i + 1), time_repartition[i]); fprintf(stderr, "%3u ms => %3u ms\t%4u\n", 2 * i, 2 * (i + 1), time_repartition[i]);
} }
FILE* csv = fopen("GSdx_profile.csv", "w"); FILE* csv = fopen("GSdx_profile.csv", "w");
if (csv) { if (csv)
for (size_t i = 0; i < times.size(); i++) { {
for (size_t i = 0; i < times.size(); i++)
{
fprintf(csv, "%zu,%lf\n", i, times[i]); fprintf(csv, "%zu,%lf\n", i, times[i]);
} }
@ -239,7 +251,8 @@ GSTexture* GSDeviceOGL::CreateSurface(int type, int w, int h, int fmt)
// NOTE: I'm not sure RenderTarget always need to be cleared. It could be costly for big upscale. // NOTE: I'm not sure RenderTarget always need to be cleared. It could be costly for big upscale.
// FIXME: it will be more logical to do it in FetchSurface. This code is only called at first creation // FIXME: it will be more logical to do it in FetchSurface. This code is only called at first creation
// of the texture. However we could reuse a deleted texture. // of the texture. However we could reuse a deleted texture.
if (m_force_texture_clear == 0) { if (m_force_texture_clear == 0)
{
// Clear won't be done if the texture isn't committed. Commit the full texture to ensure // Clear won't be done if the texture isn't committed. Commit the full texture to ensure
// correct behavior of force clear option (debug option) // correct behavior of force clear option (debug option)
t->Commit(); t->Commit();
@ -267,7 +280,8 @@ GSTexture* GSDeviceOGL::FetchSurface(int type, int w, int h, int format)
GSTexture* t = GSDevice::FetchSurface(type, w, h, format); GSTexture* t = GSDevice::FetchSurface(type, w, h, format);
if (m_force_texture_clear) { if (m_force_texture_clear)
{
// Clear won't be done if the texture isn't committed. Commit the full texture to ensure // Clear won't be done if the texture isn't committed. Commit the full texture to ensure
// correct behavior of force clear option (debug option) // correct behavior of force clear option (debug option)
t->Commit(); t->Commit();
@ -302,7 +316,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr<GSWnd> &wnd)
// Debug helper // Debug helper
// **************************************************************** // ****************************************************************
#ifdef ENABLE_OGL_DEBUG #ifdef ENABLE_OGL_DEBUG
if (theApp.GetConfigB("debug_opengl")) { if (theApp.GetConfigB("debug_opengl"))
{
glDebugMessageCallback((GLDEBUGPROC)DebugOutputToFile, NULL); glDebugMessageCallback((GLDEBUGPROC)DebugOutputToFile, NULL);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB);
@ -340,7 +355,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr<GSWnd> &wnd)
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
// Some timers to help profiling // Some timers to help profiling
if (GLLoader::in_replayer) { if (GLLoader::in_replayer)
{
glCreateQueries(GL_TIMESTAMP, 1 << 16, m_profiler.timer_query); glCreateQueries(GL_TIMESTAMP, 1 << 16, m_profiler.timer_query);
} }
} }
@ -371,7 +387,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr<GSWnd> &wnd)
{ {
GL_PUSH("GSDeviceOGL::Sampler"); GL_PUSH("GSDeviceOGL::Sampler");
for (uint32 key = 0; key < countof(m_ps_ss); key++) { for (uint32 key = 0; key < countof(m_ps_ss); key++)
{
m_ps_ss[key] = CreateSampler(PSSamplerSelector(key)); m_ps_ss[key] = CreateSampler(PSSamplerSelector(key));
} }
} }
@ -396,7 +413,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr<GSWnd> &wnd)
vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, shader.data()); vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, shader.data());
m_convert.vs = vs; m_convert.vs = vs;
for(size_t i = 0; i < countof(m_convert.ps); i++) { for (size_t i = 0; i < countof(m_convert.ps); i++)
{
ps = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data()); ps = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data());
std::string pretty_name = "Convert pipe " + std::to_string(i); std::string pretty_name = "Convert pipe " + std::to_string(i);
m_convert.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps); m_convert.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps);
@ -425,7 +443,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr<GSWnd> &wnd)
theApp.LoadResource(IDR_MERGE_GLSL, shader); theApp.LoadResource(IDR_MERGE_GLSL, shader);
for(size_t i = 0; i < countof(m_merge_obj.ps); i++) { for (size_t i = 0; i < countof(m_merge_obj.ps); i++)
{
ps = m_shader->Compile("merge.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data()); ps = m_shader->Compile("merge.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data());
std::string pretty_name = "Merge pipe " + std::to_string(i); std::string pretty_name = "Merge pipe " + std::to_string(i);
m_merge_obj.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps); m_merge_obj.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps);
@ -442,7 +461,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr<GSWnd> &wnd)
theApp.LoadResource(IDR_INTERLACE_GLSL, shader); theApp.LoadResource(IDR_INTERLACE_GLSL, shader);
for(size_t i = 0; i < countof(m_interlace.ps); i++) { for (size_t i = 0; i < countof(m_interlace.ps); i++)
{
ps = m_shader->Compile("interlace.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data()); ps = m_shader->Compile("interlace.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data());
std::string pretty_name = "Interlace pipe " + std::to_string(i); std::string pretty_name = "Interlace pipe " + std::to_string(i);
m_interlace.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps); m_interlace.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps);
@ -533,14 +553,19 @@ bool GSDeviceOGL::Create(const std::shared_ptr<GSWnd> &wnd)
// Get Available Memory // Get Available Memory
// **************************************************************** // ****************************************************************
GLint vram[4] = {0}; GLint vram[4] = {0};
if (GLLoader::vendor_id_amd) { if (GLLoader::vendor_id_amd)
{
// Full vram, remove a small margin for others buffer // Full vram, remove a small margin for others buffer
glGetIntegerv(GL_TEXTURE_FREE_MEMORY_ATI, vram); glGetIntegerv(GL_TEXTURE_FREE_MEMORY_ATI, vram);
} else if (GLExtension::Has("GL_NVX_gpu_memory_info")) { }
else if (GLExtension::Has("GL_NVX_gpu_memory_info"))
{
// GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX <= give full memory // GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX <= give full memory
// Available vram // Available vram
glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, vram); glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, vram);
} else { }
else
{
fprintf(stdout, "No extenstion supported to get available memory. Use default value !\n"); fprintf(stdout, "No extenstion supported to get available memory. Use default value !\n");
} }
@ -558,8 +583,7 @@ bool GSDeviceOGL::Create(const std::shared_ptr<GSWnd> &wnd)
GSVector2i tex_font = m_osd.get_texture_font_size(); GSVector2i tex_font = m_osd.get_texture_font_size();
m_font = std::unique_ptr<GSTexture>( m_font = std::unique_ptr<GSTexture>(
new GSTextureOGL(GSTextureOGL::Texture, tex_font.x, tex_font.y, GL_R8, m_fbo_read, false) new GSTextureOGL(GSTextureOGL::Texture, tex_font.x, tex_font.y, GL_R8, m_fbo_read, false));
);
// **************************************************************** // ****************************************************************
// Finish window setup and backbuffer // Finish window setup and backbuffer
@ -608,7 +632,8 @@ void GSDeviceOGL::CreateTextureFX()
// enough but buffer is polluted with noise. Clear will be limited // enough but buffer is polluted with noise. Clear will be limited
// to the mask. // to the mask.
glStencilMask(0xFF); glStencilMask(0xFF);
for (uint32 key = 0; key < countof(m_om_dss); key++) { for (uint32 key = 0; key < countof(m_om_dss); key++)
{
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
} }
@ -638,7 +663,8 @@ void GSDeviceOGL::Flip()
{ {
m_wnd->Flip(); m_wnd->Flip();
if (GLLoader::in_replayer) { if (GLLoader::in_replayer)
{
glQueryCounter(m_profiler.timer(), GL_TIMESTAMP); glQueryCounter(m_profiler.timer(), GL_TIMESTAMP);
m_profiler.last_query++; m_profiler.last_query++;
} }
@ -686,7 +712,8 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
{ {
if (!t) return; if (!t)
return;
GSTextureOGL* T = static_cast<GSTextureOGL*>(t); GSTextureOGL* T = static_cast<GSTextureOGL*>(t);
if (T->HasBeenCleaned() && !T->IsBackbuffer()) if (T->HasBeenCleaned() && !T->IsBackbuffer())
@ -707,18 +734,20 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
uint32 old_color_mask = GLState::wrgba; uint32 old_color_mask = GLState::wrgba;
OMSetColorMaskState(); OMSetColorMaskState();
if (T->IsBackbuffer()) { if (T->IsBackbuffer())
{
OMSetFBO(0); OMSetFBO(0);
// glDrawBuffer(GL_BACK); // this is the default when there is no FB // glDrawBuffer(GL_BACK); // this is the default when there is no FB
// 0 will select the first drawbuffer ie GL_BACK // 0 will select the first drawbuffer ie GL_BACK
glClearBufferfv(GL_COLOR, 0, c.v); glClearBufferfv(GL_COLOR, 0, c.v);
} else { }
else
{
OMSetFBO(m_fbo); OMSetFBO(m_fbo);
OMAttachRt(T); OMAttachRt(T);
glClearBufferfv(GL_COLOR, 0, c.v); glClearBufferfv(GL_COLOR, 0, c.v);
} }
OMSetColorMaskState(OMColorMaskSelector(old_color_mask)); OMSetColorMaskState(OMColorMaskSelector(old_color_mask));
@ -730,7 +759,8 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c)
{ {
if (!t) return; if (!t)
return;
GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255); GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255);
ClearRenderTarget(t, color); ClearRenderTarget(t, color);
@ -738,13 +768,15 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c)
void GSDeviceOGL::ClearDepth(GSTexture* t) void GSDeviceOGL::ClearDepth(GSTexture* t)
{ {
if (!t) return; if (!t)
return;
GSTextureOGL* T = static_cast<GSTextureOGL*>(t); GSTextureOGL* T = static_cast<GSTextureOGL*>(t);
GL_PUSH("Clear Depth %d", T->GetID()); GL_PUSH("Clear Depth %d", T->GetID());
if (0 && GLLoader::found_GL_ARB_clear_texture) { if (0 && GLLoader::found_GL_ARB_clear_texture)
{
// I don't know what the driver does but it creates // I don't know what the driver does but it creates
// some slowdowns on Harry Potter PS // some slowdowns on Harry Potter PS
// Maybe it triggers some texture relocations, or maybe // Maybe it triggers some texture relocations, or maybe
@ -754,7 +786,9 @@ void GSDeviceOGL::ClearDepth(GSTexture* t)
// Don't bother with Depth_Stencil insanity // Don't bother with Depth_Stencil insanity
T->Clear(NULL); T->Clear(NULL);
} else { }
else
{
OMSetFBO(m_fbo); OMSetFBO(m_fbo);
// RT must be detached, if RT is too small, depth won't be fully cleared // RT must be detached, if RT is too small, depth won't be fully cleared
// AT tolenico 2 map clip bug // AT tolenico 2 map clip bug
@ -764,9 +798,12 @@ void GSDeviceOGL::ClearDepth(GSTexture* t)
// TODO: check size of scissor before toggling it // TODO: check size of scissor before toggling it
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
float c = 0.0f; float c = 0.0f;
if (GLState::depth_mask) { if (GLState::depth_mask)
{
glClearBufferfv(GL_DEPTH, 0, &c); glClearBufferfv(GL_DEPTH, 0, &c);
} else { }
else
{
glDepthMask(true); glDepthMask(true);
glClearBufferfv(GL_DEPTH, 0, &c); glClearBufferfv(GL_DEPTH, 0, &c);
glDepthMask(false); glDepthMask(false);
@ -777,7 +814,8 @@ void GSDeviceOGL::ClearDepth(GSTexture* t)
void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c)
{ {
if (!t) return; if (!t)
return;
GSTextureOGL* T = static_cast<GSTextureOGL*>(t); GSTextureOGL* T = static_cast<GSTextureOGL*>(t);
@ -800,15 +838,19 @@ GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel)
glCreateSamplers(1, &sampler); glCreateSamplers(1, &sampler);
// Bilinear filtering // Bilinear filtering
if (sel.biln) { if (sel.biln)
{
glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
} else { }
else
{
glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
} }
switch (static_cast<GS_MIN_FILTER>(sel.triln)) { switch (static_cast<GS_MIN_FILTER>(sel.triln))
{
case GS_MIN_FILTER::Nearest: case GS_MIN_FILTER::Nearest:
// Nop based on biln // Nop based on biln
break; break;
@ -846,7 +888,8 @@ GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel)
glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
int anisotropy = theApp.GetConfigI("MaxAnisotropy"); int anisotropy = theApp.GetConfigI("MaxAnisotropy");
if (anisotropy && sel.aniso) { if (anisotropy && sel.aniso)
{
if (GLExtension::Has("GL_ARB_texture_filter_anisotropic")) if (GLExtension::Has("GL_ARB_texture_filter_anisotropic"))
glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY, (float)anisotropy); glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY, (float)anisotropy);
else if (GLExtension::Has("GL_EXT_texture_filter_anisotropic")) else if (GLExtension::Has("GL_EXT_texture_filter_anisotropic"))
@ -911,7 +954,8 @@ void GSDeviceOGL::InitPrimDateTexture(GSTexture* rt, const GSVector4i& area)
void GSDeviceOGL::RecycleDateTexture() void GSDeviceOGL::RecycleDateTexture()
{ {
if (m_date.t) { if (m_date.t)
{
//static_cast<GSTextureOGL*>(m_date.t)->Save(format("/tmp/date_adv_%04ld.csv", GSState::s_n)); //static_cast<GSTextureOGL*>(m_date.t)->Save(format("/tmp/date_adv_%04ld.csv", GSState::s_n));
Recycle(m_date.t); Recycle(m_date.t);
@ -1009,7 +1053,8 @@ void GSDeviceOGL::SelfShaderTestRun(const std::string& dir, const std::string& f
#ifdef __linux__ #ifdef __linux__
// Nouveau actually // Nouveau actually
if (GLLoader::mesa_driver) { if (GLLoader::mesa_driver)
{
if (freopen(out.c_str(), "w", stderr) == NULL) if (freopen(out.c_str(), "w", stderr) == NULL)
fprintf(stderr, "Failed to redirect stderr\n"); fprintf(stderr, "Failed to redirect stderr\n");
} }
@ -1021,7 +1066,8 @@ void GSDeviceOGL::SelfShaderTestRun(const std::string& dir, const std::string& f
#ifdef __linux__ #ifdef __linux__
// Nouveau actually // Nouveau actually
if (GLLoader::mesa_driver) { if (GLLoader::mesa_driver)
{
if (freopen("/dev/tty", "w", stderr) == NULL) if (freopen("/dev/tty", "w", stderr) == NULL)
fprintf(stderr, "Failed to restore stderr\n"); fprintf(stderr, "Failed to restore stderr\n");
} }
@ -1054,15 +1100,18 @@ void GSDeviceOGL::SelfShaderTest()
int nb_shader = 0; int nb_shader = 0;
test = "SW_Blending"; test = "SW_Blending";
for (int colclip = 0; colclip < 2; colclip++) { for (int colclip = 0; colclip < 2; colclip++)
for (int fmt = 0; fmt < 3; fmt++) { {
for (int i = 0; i < 3; i++) { for (int fmt = 0; fmt < 3; fmt++)
{
for (int i = 0; i < 3; i++)
{
PSSelector sel; PSSelector sel;
sel.tfx = 4; sel.tfx = 4;
int ib = (i + 1) % 3; int ib = (i + 1) % 3;
sel.blend_a = i; sel.blend_a = i;
sel.blend_b = ib;; sel.blend_b = ib;
sel.blend_c = i; sel.blend_c = i;
sel.blend_d = i; sel.blend_d = i;
sel.colclip = colclip; sel.colclip = colclip;
@ -1077,7 +1126,8 @@ void GSDeviceOGL::SelfShaderTest()
SelfShaderTestPrint(test, nb_shader); SelfShaderTestPrint(test, nb_shader);
test = "Alpha_Test"; test = "Alpha_Test";
for (int atst = 0; atst < 5; atst++) { for (int atst = 0; atst < 5; atst++)
{
PSSelector sel; PSSelector sel;
sel.tfx = 4; sel.tfx = 4;
@ -1088,7 +1138,8 @@ void GSDeviceOGL::SelfShaderTest()
SelfShaderTestPrint(test, nb_shader); SelfShaderTestPrint(test, nb_shader);
test = "Fbmask__Fog__Shuffle__Read_ba"; test = "Fbmask__Fog__Shuffle__Read_ba";
for (int read_ba = 0; read_ba < 2; read_ba++) { for (int read_ba = 0; read_ba < 2; read_ba++)
{
PSSelector sel; PSSelector sel;
sel.tfx = 4; sel.tfx = 4;
@ -1103,7 +1154,8 @@ void GSDeviceOGL::SelfShaderTest()
SelfShaderTestPrint(test, nb_shader); SelfShaderTestPrint(test, nb_shader);
test = "Date"; test = "Date";
for (int date = 1; date < 7; date++) { for (int date = 1; date < 7; date++)
{
PSSelector sel; PSSelector sel;
sel.tfx = 4; sel.tfx = 4;
@ -1114,7 +1166,8 @@ void GSDeviceOGL::SelfShaderTest()
SelfShaderTestPrint(test, nb_shader); SelfShaderTestPrint(test, nb_shader);
test = "FBA"; test = "FBA";
for (int fmt = 0; fmt < 3; fmt++) { for (int fmt = 0; fmt < 3; fmt++)
{
PSSelector sel; PSSelector sel;
sel.tfx = 4; sel.tfx = 4;
@ -1141,9 +1194,12 @@ void GSDeviceOGL::SelfShaderTest()
SelfShaderTestPrint(test, nb_shader); SelfShaderTestPrint(test, nb_shader);
test = "Tfx__Tcc"; test = "Tfx__Tcc";
for (int channel = 0; channel < 5; channel++) { for (int channel = 0; channel < 5; channel++)
for (int tfx = 0; tfx < 5; tfx++) { {
for (int tcc = 0; tcc < 2; tcc++) { for (int tfx = 0; tfx < 5; tfx++)
{
for (int tcc = 0; tcc < 2; tcc++)
{
PSSelector sel; PSSelector sel;
sel.fst = 1; sel.fst = 1;
@ -1158,14 +1214,21 @@ void GSDeviceOGL::SelfShaderTest()
SelfShaderTestPrint(test, nb_shader); SelfShaderTestPrint(test, nb_shader);
test = "Texture_Sampling"; test = "Texture_Sampling";
for (int depth = 0; depth < 4; depth++) { for (int depth = 0; depth < 4; depth++)
for (int fmt = 0; fmt < 16; fmt++) { {
if ((fmt & 3) == 3) continue; for (int fmt = 0; fmt < 16; fmt++)
{
if ((fmt & 3) == 3)
continue;
for (int ltf = 0; ltf < 2; ltf++) { for (int ltf = 0; ltf < 2; ltf++)
for (int aem = 0; aem < 2; aem++) { {
for (int wms = 1; wms < 4; wms++) { for (int aem = 0; aem < 2; aem++)
for (int wmt = 1; wmt < 4; wmt++) { {
for (int wms = 1; wms < 4; wms++)
{
for (int wmt = 1; wmt < 4; wmt++)
{
PSSelector sel; PSSelector sel;
sel.tfx = 1; sel.tfx = 1;
sel.tcc = 1; sel.tcc = 1;
@ -1352,7 +1415,8 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
// 2/ in case some GSdx code expect thing in dx order. // 2/ in case some GSdx code expect thing in dx order.
// Only flipping the backbuffer is transparent (I hope)... // Only flipping the backbuffer is transparent (I hope)...
GSVector4 flip_sr = sRect; GSVector4 flip_sr = sRect;
if (static_cast<GSTextureOGL*>(dTex)->IsBackbuffer()) { if (static_cast<GSTextureOGL*>(dTex)->IsBackbuffer())
{
flip_sr.y = sRect.w; flip_sr.y = sRect.w;
flip_sr.w = sRect.y; flip_sr.w = sRect.y;
} }
@ -1398,7 +1462,8 @@ void GSDeviceOGL::RenderOsd(GSTexture* dt)
OMSetBlendState((uint8)GSDeviceOGL::m_MERGE_BLEND); OMSetBlendState((uint8)GSDeviceOGL::m_MERGE_BLEND);
OMSetRenderTargets(dt, NULL); OMSetRenderTargets(dt, NULL);
if(m_osd.m_texture_dirty) { if (m_osd.m_texture_dirty)
{
m_osd.upload_texture_atlas(m_font.get()); m_osd.upload_texture_atlas(m_font.get());
} }
@ -1434,14 +1499,16 @@ void GSDeviceOGL::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex,
ClearRenderTarget(dTex, c); ClearRenderTarget(dTex, c);
// Upload constant to select YUV algo // Upload constant to select YUV algo
if (feedback_write_2 || feedback_write_1) { if (feedback_write_2 || feedback_write_1)
{
// Write result to feedback loop // Write result to feedback loop
m_misc_cb_cache.EMOD_AC.x = EXTBUF.EMODA; m_misc_cb_cache.EMOD_AC.x = EXTBUF.EMODA;
m_misc_cb_cache.EMOD_AC.y = EXTBUF.EMODC; m_misc_cb_cache.EMOD_AC.y = EXTBUF.EMODC;
m_convert.cb->cache_upload(&m_misc_cb_cache); m_convert.cb->cache_upload(&m_misc_cb_cache);
} }
if (sTex[1] && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg)) { if (sTex[1] && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg))
{
// 2nd output is enabled and selected. Copy it to destination so we can blend it with 1st output // 2nd output is enabled and selected. Copy it to destination so we can blend it with 1st output
// Note: value outside of dRect must contains the background color (c) // Note: value outside of dRect must contains the background color (c)
StretchRect(sTex[1], sRect[1], dTex, dRect[1], ShaderConvert_COPY); StretchRect(sTex[1], sRect[1], dTex, dRect[1], ShaderConvert_COPY);
@ -1455,16 +1522,20 @@ void GSDeviceOGL::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex,
if (feedback_write_2_but_blend_bg) if (feedback_write_2_but_blend_bg)
ClearRenderTarget(dTex, c); ClearRenderTarget(dTex, c);
if (sTex[0]) { if (sTex[0])
{
if (PMODE.AMOD == 1) // Keep the alpha from the 2nd output if (PMODE.AMOD == 1) // Keep the alpha from the 2nd output
OMSetColorMaskState(OMColorMaskSelector(0x7)); OMSetColorMaskState(OMColorMaskSelector(0x7));
// 1st output is enabled. It must be blended // 1st output is enabled. It must be blended
if (PMODE.MMOD == 1) { if (PMODE.MMOD == 1)
{
// Blend with a constant alpha // Blend with a constant alpha
m_merge_obj.cb->cache_upload(&c.v); m_merge_obj.cb->cache_upload(&c.v);
StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge_obj.ps[1], m_MERGE_BLEND, OMColorMaskSelector()); StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge_obj.ps[1], m_MERGE_BLEND, OMColorMaskSelector());
} else { }
else
{
// Blend with 2 * input alpha // Blend with 2 * input alpha
StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge_obj.ps[0], m_MERGE_BLEND, OMColorMaskSelector()); StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge_obj.ps[0], m_MERGE_BLEND, OMColorMaskSelector());
} }
@ -1498,8 +1569,10 @@ void GSDeviceOGL::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool
void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex) void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex)
{ {
// Lazy compile // Lazy compile
if (!m_fxaa.ps) { if (!m_fxaa.ps)
if (!GLLoader::found_GL_ARB_gpu_shader5) { // GL4.0 extension {
if (!GLLoader::found_GL_ARB_gpu_shader5) // GL4.0 extension
{
return; return;
} }
@ -1528,8 +1601,10 @@ void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex)
void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex) void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex)
{ {
// Lazy compile // Lazy compile
if (!m_shaderfx.ps) { if (!m_shaderfx.ps)
if (!GLLoader::found_GL_ARB_gpu_shader5) { // GL4.0 extension {
if (!GLLoader::found_GL_ARB_gpu_shader5) // GL4.0 extension
{
return; return;
} }
@ -1545,7 +1620,8 @@ void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex)
std::string shader_name(theApp.GetConfigS("shaderfx_glsl")); std::string shader_name(theApp.GetConfigS("shaderfx_glsl"));
std::ifstream fshader(shader_name); std::ifstream fshader(shader_name);
std::stringstream shader; std::stringstream shader;
if (!fshader.good()) { if (!fshader.good())
{
fprintf(stderr, "Error failed to load '%s'. External Shader will be disabled !\n", shader_name.c_str()); fprintf(stderr, "Error failed to load '%s'. External Shader will be disabled !\n", shader_name.c_str());
return; return;
} }
@ -1606,7 +1682,8 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
// om // om
OMSetDepthStencilState(m_date.dss); OMSetDepthStencilState(m_date.dss);
if (GLState::blend) { if (GLState::blend)
{
glDisable(GL_BLEND); glDisable(GL_BLEND);
} }
OMSetRenderTargets(NULL, ds, &GLState::scissor); OMSetRenderTargets(NULL, ds, &GLState::scissor);
@ -1624,7 +1701,8 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
DrawPrimitive(); DrawPrimitive();
if (GLState::blend) { if (GLState::blend)
{
glEnable(GL_BLEND); glEnable(GL_BLEND);
} }
@ -1655,9 +1733,11 @@ void GSDeviceOGL::PSSetShaderResource(int i, GSTexture* sr)
{ {
ASSERT(i < (int)countof(GLState::tex_unit)); ASSERT(i < (int)countof(GLState::tex_unit));
// Note: Nvidia debgger doesn't support the id 0 (ie the NULL texture) // Note: Nvidia debgger doesn't support the id 0 (ie the NULL texture)
if (sr) { if (sr)
{
GLuint id = static_cast<GSTextureOGL*>(sr)->GetID(); GLuint id = static_cast<GSTextureOGL*>(sr)->GetID();
if (GLState::tex_unit[i] != id) { if (GLState::tex_unit[i] != id)
{
GLState::tex_unit[i] = id; GLState::tex_unit[i] = id;
glBindTextureUnit(i, id); glBindTextureUnit(i, id);
} }
@ -1672,7 +1752,8 @@ void GSDeviceOGL::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
void GSDeviceOGL::PSSetSamplerState(GLuint ss) void GSDeviceOGL::PSSetSamplerState(GLuint ss)
{ {
if (GLState::ps_ss != ss) { if (GLState::ps_ss != ss)
{
GLState::ps_ss = ss; GLState::ps_ss = ss;
glBindSampler(0, ss); glBindSampler(0, ss);
} }
@ -1681,14 +1762,18 @@ void GSDeviceOGL::PSSetSamplerState(GLuint ss)
void GSDeviceOGL::OMAttachRt(GSTextureOGL* rt) void GSDeviceOGL::OMAttachRt(GSTextureOGL* rt)
{ {
GLuint id; GLuint id;
if (rt) { if (rt)
{
rt->WasAttached(); rt->WasAttached();
id = rt->GetID(); id = rt->GetID();
} else { }
else
{
id = 0; id = 0;
} }
if (GLState::rt != id) { if (GLState::rt != id)
{
GLState::rt = id; GLState::rt = id;
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, id, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, id, 0);
} }
@ -1697,14 +1782,18 @@ void GSDeviceOGL::OMAttachRt(GSTextureOGL* rt)
void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds) void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds)
{ {
GLuint id; GLuint id;
if (ds) { if (ds)
{
ds->WasAttached(); ds->WasAttached();
id = ds->GetID(); id = ds->GetID();
} else { }
else
{
id = 0; id = 0;
} }
if (GLState::ds != id) { if (GLState::ds != id)
{
GLState::ds = id; GLState::ds = id;
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0);
} }
@ -1712,7 +1801,8 @@ void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds)
void GSDeviceOGL::OMSetFBO(GLuint fbo) void GSDeviceOGL::OMSetFBO(GLuint fbo)
{ {
if (GLState::fbo != fbo) { if (GLState::fbo != fbo)
{
GLState::fbo = fbo; GLState::fbo = fbo;
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
} }
@ -1726,7 +1816,8 @@ void GSDeviceOGL::OMSetDepthStencilState(GSDepthStencilOGL* dss)
void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel) void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel)
{ {
if (sel.wrgba != GLState::wrgba) { if (sel.wrgba != GLState::wrgba)
{
GLState::wrgba = sel.wrgba; GLState::wrgba = sel.wrgba;
glColorMaski(0, sel.wr, sel.wg, sel.wb, sel.wa); glColorMaski(0, sel.wr, sel.wg, sel.wb, sel.wa);
@ -1735,37 +1826,45 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel)
void GSDeviceOGL::OMSetBlendState(uint8 blend_index, uint8 blend_factor, bool is_blend_constant, bool accumulation_blend) void GSDeviceOGL::OMSetBlendState(uint8 blend_index, uint8 blend_factor, bool is_blend_constant, bool accumulation_blend)
{ {
if (blend_index) { if (blend_index)
if (!GLState::blend) { {
if (!GLState::blend)
{
GLState::blend = true; GLState::blend = true;
glEnable(GL_BLEND); glEnable(GL_BLEND);
} }
if (is_blend_constant && GLState::bf != blend_factor) { if (is_blend_constant && GLState::bf != blend_factor)
{
GLState::bf = blend_factor; GLState::bf = blend_factor;
float bf = (float)blend_factor / 128.0f; float bf = (float)blend_factor / 128.0f;
glBlendColor(bf, bf, bf, bf); glBlendColor(bf, bf, bf, bf);
} }
HWBlend b = GetBlend(blend_index); HWBlend b = GetBlend(blend_index);
if (accumulation_blend) { if (accumulation_blend)
{
b.src = GL_ONE; b.src = GL_ONE;
b.dst = GL_ONE; b.dst = GL_ONE;
} }
if (GLState::eq_RGB != b.op) { if (GLState::eq_RGB != b.op)
{
GLState::eq_RGB = b.op; GLState::eq_RGB = b.op;
glBlendEquationSeparate(b.op, GL_FUNC_ADD); glBlendEquationSeparate(b.op, GL_FUNC_ADD);
} }
if (GLState::f_sRGB != b.src || GLState::f_dRGB != b.dst) { if (GLState::f_sRGB != b.src || GLState::f_dRGB != b.dst)
{
GLState::f_sRGB = b.src; GLState::f_sRGB = b.src;
GLState::f_dRGB = b.dst; GLState::f_dRGB = b.dst;
glBlendFuncSeparate(b.src, b.dst, GL_ONE, GL_ZERO); glBlendFuncSeparate(b.src, b.dst, GL_ONE, GL_ZERO);
} }
}
} else { else
if (GLState::blend) { {
if (GLState::blend)
{
GLState::blend = false; GLState::blend = false;
glDisable(GL_BLEND); glDisable(GL_BLEND);
} }
@ -1777,11 +1876,15 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto
GSTextureOGL* RT = static_cast<GSTextureOGL*>(rt); GSTextureOGL* RT = static_cast<GSTextureOGL*>(rt);
GSTextureOGL* DS = static_cast<GSTextureOGL*>(ds); GSTextureOGL* DS = static_cast<GSTextureOGL*>(ds);
if (rt == NULL || !RT->IsBackbuffer()) { if (rt == NULL || !RT->IsBackbuffer())
{
OMSetFBO(m_fbo); OMSetFBO(m_fbo);
if (rt) { if (rt)
{
OMAttachRt(RT); OMAttachRt(RT);
} else { }
else
{
OMAttachRt(); OMAttachRt();
} }
@ -1790,8 +1893,9 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto
OMAttachDs(DS); OMAttachDs(DS);
else else
OMAttachDs(); OMAttachDs();
}
} else { else
{
// Render in the backbuffer // Render in the backbuffer
OMSetFBO(0); OMSetFBO(0);
} }
@ -1818,11 +1922,13 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto
void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb) void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb)
{ {
GL_PUSH("UBO"); GL_PUSH("UBO");
if(m_vs_cb_cache.Update(vs_cb)) { if (m_vs_cb_cache.Update(vs_cb))
{
m_vs_cb->upload(vs_cb); m_vs_cb->upload(vs_cb);
} }
if(m_ps_cb_cache.Update(ps_cb)) { if (m_ps_cb_cache.Update(ps_cb))
{
m_ps_cb->upload(ps_cb); m_ps_cb->upload(ps_cb);
} }
} }
@ -1838,10 +1944,13 @@ void GSDeviceOGL::SetupPipeline(const VSSelector& vsel, const GSSelector& gsel,
GLuint ps; GLuint ps;
auto i = m_ps.find(psel); auto i = m_ps.find(psel);
if (i == m_ps.end()) { if (i == m_ps.end())
{
ps = CompilePS(psel); ps = CompilePS(psel);
m_ps[psel] = ps; m_ps[psel] = ps;
} else { }
else
{
ps = i->second; ps = i->second;
} }
@ -1852,9 +1961,12 @@ void GSDeviceOGL::SetupPipeline(const VSSelector& vsel, const GSSelector& gsel,
static PSSelector old_psel; static PSSelector old_psel;
static GLuint old_ps = 0; static GLuint old_ps = 0;
std::string msg(""); std::string msg("");
#define CHECK_STATE(p) if (psel.p != old_psel.p) msg.append(" ").append(#p); #define CHECK_STATE(p) \
if (psel.p != old_psel.p) \
msg.append(" ").append(#p);
if (old_ps != ps) { if (old_ps != ps)
{
CHECK_STATE(tex_fmt); CHECK_STATE(tex_fmt);
CHECK_STATE(dfmt); CHECK_STATE(dfmt);
@ -1922,7 +2034,8 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
std::string message(gl_message, gl_length >= 0 ? gl_length : strlen(gl_message)); std::string message(gl_message, gl_length >= 0 ? gl_length : strlen(gl_message));
std::string type, severity, source; std::string type, severity, source;
static int sev_counter = 0; static int sev_counter = 0;
switch(gl_type) { switch (gl_type)
{
case GL_DEBUG_TYPE_ERROR_ARB : type = "Error"; break; case GL_DEBUG_TYPE_ERROR_ARB : type = "Error"; break;
case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB : type = "Deprecated bhv"; break; case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB : type = "Deprecated bhv"; break;
case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB : type = "Undefined bhv"; break; case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB : type = "Undefined bhv"; break;
@ -1933,11 +2046,13 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
case GL_DEBUG_TYPE_POP_GROUP : return; // Don't print message injected by myself case GL_DEBUG_TYPE_POP_GROUP : return; // Don't print message injected by myself
default : type = "TTT"; break; default : type = "TTT"; break;
} }
switch(gl_severity) { switch (gl_severity)
{
case GL_DEBUG_SEVERITY_HIGH_ARB : severity = "High"; sev_counter++; break; case GL_DEBUG_SEVERITY_HIGH_ARB : severity = "High"; sev_counter++; break;
case GL_DEBUG_SEVERITY_MEDIUM_ARB : severity = "Mid"; break; case GL_DEBUG_SEVERITY_MEDIUM_ARB : severity = "Mid"; break;
case GL_DEBUG_SEVERITY_LOW_ARB : severity = "Low"; break; case GL_DEBUG_SEVERITY_LOW_ARB : severity = "Low"; break;
default : if (id == 0xFEAD) default:
if (id == 0xFEAD)
severity = "Cache"; severity = "Cache";
else if (id == 0xB0B0) else if (id == 0xB0B0)
severity = "REG"; severity = "REG";
@ -1945,7 +2060,8 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
severity = "EXTRA"; severity = "EXTRA";
break; break;
} }
switch(gl_source) { switch (gl_source)
{
case GL_DEBUG_SOURCE_API_ARB : source = "API"; break; case GL_DEBUG_SOURCE_API_ARB : source = "API"; break;
case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB : source = "WINDOW"; break; case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB : source = "WINDOW"; break;
case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB : source = "COMPILER"; break; case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB : source = "COMPILER"; break;
@ -1957,16 +2073,19 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
#ifdef _DEBUG #ifdef _DEBUG
// Don't spam noisy information on the terminal // Don't spam noisy information on the terminal
if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION) { if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION)
{
fprintf(stderr, "T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str()); fprintf(stderr, "T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str());
} }
#else #else
// Print nouveau shader compiler info // Print nouveau shader compiler info
if (GSState::s_n == 0) { if (GSState::s_n == 0)
{
int t, local, gpr, inst, byte; int t, local, gpr, inst, byte;
int status = sscanf(message.c_str(), "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d", int status = sscanf(message.c_str(), "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
&t, &local, &gpr, &inst, &byte); &t, &local, &gpr, &inst, &byte);
if (status == 5) { if (status == 5)
{
m_shader_inst += inst; m_shader_inst += inst;
m_shader_reg += gpr; m_shader_reg += gpr;
fprintf(stderr, "T:%s\t\tS:%s\t=> %s\n", type.c_str(), severity.c_str(), message.c_str()); fprintf(stderr, "T:%s\t\tS:%s\t=> %s\n", type.c_str(), severity.c_str(), message.c_str());
@ -1978,9 +2097,11 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
fprintf(m_debug_gl_file, "T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str()); fprintf(m_debug_gl_file, "T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str());
#ifdef _DEBUG #ifdef _DEBUG
if (sev_counter >= 5) { if (sev_counter >= 5)
{
// Close the file to flush the content on disk before exiting. // Close the file to flush the content on disk before exiting.
if (m_debug_gl_file) { if (m_debug_gl_file)
{
fclose(m_debug_gl_file); fclose(m_debug_gl_file);
m_debug_gl_file = NULL; m_debug_gl_file = NULL;
} }

View File

@ -34,7 +34,8 @@ extern uint64 g_real_texture_upload_byte;
extern uint64 g_vertex_upload_byte; extern uint64 g_vertex_upload_byte;
#endif #endif
class GSDepthStencilOGL { class GSDepthStencilOGL
{
bool m_depth_enable; bool m_depth_enable;
GLenum m_depth_func; GLenum m_depth_func;
bool m_depth_mask; bool m_depth_mask;
@ -44,8 +45,8 @@ class GSDepthStencilOGL {
GLenum m_stencil_spass_dpass_op; GLenum m_stencil_spass_dpass_op;
public: public:
GSDepthStencilOGL()
GSDepthStencilOGL() : m_depth_enable(false) : m_depth_enable(false)
, m_depth_func(GL_ALWAYS) , m_depth_func(GL_ALWAYS)
, m_depth_mask(0) , m_depth_mask(0)
, m_stencil_enable(false) , m_stencil_enable(false)
@ -57,12 +58,21 @@ public:
void EnableDepth() { m_depth_enable = true; } void EnableDepth() { m_depth_enable = true; }
void EnableStencil() { m_stencil_enable = true; } void EnableStencil() { m_stencil_enable = true; }
void SetDepth(GLenum func, bool mask) { m_depth_func = func; m_depth_mask = mask; } void SetDepth(GLenum func, bool mask)
void SetStencil(GLenum func, GLenum pass) { m_stencil_func = func; m_stencil_spass_dpass_op = pass; } {
m_depth_func = func;
m_depth_mask = mask;
}
void SetStencil(GLenum func, GLenum pass)
{
m_stencil_func = func;
m_stencil_spass_dpass_op = pass;
}
void SetupDepth() void SetupDepth()
{ {
if (GLState::depth != m_depth_enable) { if (GLState::depth != m_depth_enable)
{
GLState::depth = m_depth_enable; GLState::depth = m_depth_enable;
if (m_depth_enable) if (m_depth_enable)
glEnable(GL_DEPTH_TEST); glEnable(GL_DEPTH_TEST);
@ -70,12 +80,15 @@ public:
glDisable(GL_DEPTH_TEST); glDisable(GL_DEPTH_TEST);
} }
if (m_depth_enable) { if (m_depth_enable)
if (GLState::depth_func != m_depth_func) { {
if (GLState::depth_func != m_depth_func)
{
GLState::depth_func = m_depth_func; GLState::depth_func = m_depth_func;
glDepthFunc(m_depth_func); glDepthFunc(m_depth_func);
} }
if (GLState::depth_mask != m_depth_mask) { if (GLState::depth_mask != m_depth_mask)
{
GLState::depth_mask = m_depth_mask; GLState::depth_mask = m_depth_mask;
glDepthMask((GLboolean)m_depth_mask); glDepthMask((GLboolean)m_depth_mask);
} }
@ -84,7 +97,8 @@ public:
void SetupStencil() void SetupStencil()
{ {
if (GLState::stencil != m_stencil_enable) { if (GLState::stencil != m_stencil_enable)
{
GLState::stencil = m_stencil_enable; GLState::stencil = m_stencil_enable;
if (m_stencil_enable) if (m_stencil_enable)
glEnable(GL_STENCIL_TEST); glEnable(GL_STENCIL_TEST);
@ -92,13 +106,16 @@ public:
glDisable(GL_STENCIL_TEST); glDisable(GL_STENCIL_TEST);
} }
if (m_stencil_enable) { if (m_stencil_enable)
{
// Note: here the mask control which bitplane is considered by the operation // Note: here the mask control which bitplane is considered by the operation
if (GLState::stencil_func != m_stencil_func) { if (GLState::stencil_func != m_stencil_func)
{
GLState::stencil_func = m_stencil_func; GLState::stencil_func = m_stencil_func;
glStencilFunc(m_stencil_func, 1, 1); glStencilFunc(m_stencil_func, 1, 1);
} }
if (GLState::stencil_pass != m_stencil_spass_dpass_op) { if (GLState::stencil_pass != m_stencil_spass_dpass_op)
{
GLState::stencil_pass = m_stencil_spass_dpass_op; GLState::stencil_pass = m_stencil_spass_dpass_op;
glStencilOp(GL_KEEP, GL_KEEP, m_stencil_spass_dpass_op); glStencilOp(GL_KEEP, GL_KEEP, m_stencil_spass_dpass_op);
} }
@ -161,8 +178,14 @@ public:
operator uint32() const { return key; } operator uint32() const { return key; }
VSSelector() : key(0) {} VSSelector()
VSSelector(uint32 k) : key(k) {} : key(0)
{
}
VSSelector(uint32 k)
: key(k)
{
}
}; };
struct GSSelector struct GSSelector
@ -183,8 +206,14 @@ public:
operator uint32() const { return key; } operator uint32() const { return key; }
GSSelector() : key(0) {} GSSelector()
GSSelector(uint32 k) : key(k) {} : key(0)
{
}
GSSelector(uint32 k)
: key(k)
{
}
}; };
struct alignas(32) PSConstantBuffer struct alignas(32) PSConstantBuffer
@ -330,7 +359,10 @@ public:
// FIXME is the & useful ? // FIXME is the & useful ?
operator uint64() const { return key; } operator uint64() const { return key; }
PSSelector() : key(0) {} PSSelector()
: key(0)
{
}
}; };
struct PSSamplerSelector struct PSSamplerSelector
@ -353,8 +385,14 @@ public:
operator uint32() { return key; } operator uint32() { return key; }
PSSamplerSelector() : key(0) {} PSSamplerSelector()
PSSamplerSelector(uint32 k) : key(k) {} : key(0)
{
}
PSSamplerSelector(uint32 k)
: key(k)
{
}
}; };
struct OMDepthStencilSelector struct OMDepthStencilSelector
@ -377,8 +415,14 @@ public:
// FIXME is the & useful ? // FIXME is the & useful ?
operator uint32() { return key; } operator uint32() { return key; }
OMDepthStencilSelector() : key(0) {} OMDepthStencilSelector()
OMDepthStencilSelector(uint32 k) : key(k) {} : key(0)
{
}
OMDepthStencilSelector(uint32 k)
: key(k)
{
}
}; };
struct OMColorMaskSelector struct OMColorMaskSelector
@ -406,7 +450,10 @@ public:
// FIXME is the & useful ? // FIXME is the & useful ?
operator uint32() { return key & 0xf; } operator uint32() { return key & 0xf; }
OMColorMaskSelector() : key(0xF) {} OMColorMaskSelector()
: key(0xF)
{
}
OMColorMaskSelector(uint32 c) { wrgba = c; } OMColorMaskSelector(uint32 c) { wrgba = c; }
}; };
@ -441,17 +488,20 @@ private:
GSVertexBufferStateOGL* m_va; // state of the vertex buffer/array GSVertexBufferStateOGL* m_va; // state of the vertex buffer/array
struct { struct
{
GLuint ps[2]; // program object GLuint ps[2]; // program object
GSUniformBufferOGL* cb; // uniform buffer object GSUniformBufferOGL* cb; // uniform buffer object
} m_merge_obj; } m_merge_obj;
struct { struct
{
GLuint ps[4]; // program object GLuint ps[4]; // program object
GSUniformBufferOGL* cb; // uniform buffer object GSUniformBufferOGL* cb; // uniform buffer object
} m_interlace; } m_interlace;
struct { struct
{
GLuint vs; // program object GLuint vs; // program object
GLuint ps[ShaderConvert_Count]; // program object GLuint ps[ShaderConvert_Count]; // program object
GLuint ln; // sampler object GLuint ln; // sampler object
@ -461,26 +511,31 @@ private:
GSUniformBufferOGL* cb; GSUniformBufferOGL* cb;
} m_convert; } m_convert;
struct { struct
{
GLuint ps; GLuint ps;
GSUniformBufferOGL* cb; GSUniformBufferOGL* cb;
} m_fxaa; } m_fxaa;
struct { struct
{
GLuint ps; GLuint ps;
GSUniformBufferOGL* cb; GSUniformBufferOGL* cb;
} m_shaderfx; } m_shaderfx;
struct { struct
{
GSDepthStencilOGL* dss; GSDepthStencilOGL* dss;
GSTexture* t; GSTexture* t;
} m_date; } m_date;
struct { struct
{
GLuint ps; GLuint ps;
} m_shadeboost; } m_shadeboost;
struct { struct
{
uint16 last_query; uint16 last_query;
GLuint timer_query[1 << 16]; GLuint timer_query[1 << 16];

View File

@ -51,7 +51,8 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy)
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
if (m_userhacks_wildhack && !m_isPackedUV_HackFlag && PRIM->TME && PRIM->FST) { if (m_userhacks_wildhack && !m_isPackedUV_HackFlag && PRIM->TME && PRIM->FST)
{
for (unsigned int i = 0; i < m_vertex.next; i++) for (unsigned int i = 0; i < m_vertex.next; i++)
m_vertex.buff[i].UV &= 0x3FEF3FEF; m_vertex.buff[i].UV &= 0x3FEF3FEF;
} }
@ -62,7 +63,8 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy)
switch (m_vt.m_primclass) switch (m_vt.m_primclass)
{ {
case GS_POINT_CLASS: case GS_POINT_CLASS:
if (unscale_pt_ln) { if (unscale_pt_ln)
{
m_gs_sel.point = 1; m_gs_sel.point = 1;
vs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy); vs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy);
} }
@ -71,7 +73,8 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy)
break; break;
case GS_LINE_CLASS: case GS_LINE_CLASS:
if (unscale_pt_ln) { if (unscale_pt_ln)
{
m_gs_sel.line = 1; m_gs_sel.line = 1;
vs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy); vs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy);
} }
@ -95,11 +98,14 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy)
// the extra validation cost of the extra stage. // the extra validation cost of the extra stage.
// //
// Note: keep Geometry Shader in the replayer to ease debug. // Note: keep Geometry Shader in the replayer to ease debug.
if (GLLoader::found_geometry_shader && !m_vt.m_accurate_stq && (m_vertex.next > 32 || GLLoader::in_replayer)) { // <=> 16 sprites (based on Shadow Hearts) if (GLLoader::found_geometry_shader && !m_vt.m_accurate_stq && (m_vertex.next > 32 || GLLoader::in_replayer)) // <=> 16 sprites (based on Shadow Hearts)
{
m_gs_sel.sprite = 1; m_gs_sel.sprite = 1;
t = GL_LINES; t = GL_LINES;
} else { }
else
{
Lines2Sprites(); Lines2Sprites();
t = GL_TRIANGLES; t = GL_TRIANGLES;
@ -121,10 +127,13 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy)
void GSRendererOGL::EmulateZbuffer() void GSRendererOGL::EmulateZbuffer()
{ {
if (m_context->TEST.ZTE) { if (m_context->TEST.ZTE)
{
m_om_dssel.ztst = m_context->TEST.ZTST; m_om_dssel.ztst = m_context->TEST.ZTST;
m_om_dssel.zwe = !m_context->ZBUF.ZMSK; m_om_dssel.zwe = !m_context->ZBUF.ZMSK;
} else { }
else
{
m_om_dssel.ztst = ZTST_ALWAYS; m_om_dssel.ztst = ZTST_ALWAYS;
} }
@ -137,10 +146,14 @@ void GSRendererOGL::EmulateZbuffer()
//ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, 1.0f); //ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, 1.0f);
m_ps_sel.zclamp = 0; m_ps_sel.zclamp = 0;
if (clamp_z) { if (clamp_z)
if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS) { {
if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS)
{
vs_cb.MaxDepth = GSVector2i(max_z); vs_cb.MaxDepth = GSVector2i(max_z);
} else { }
else
{
ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, max_z * ldexpf(1, -32)); ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, max_z * ldexpf(1, -32));
m_ps_sel.zclamp = 1; m_ps_sel.zclamp = 1;
} }
@ -148,7 +161,8 @@ void GSRendererOGL::EmulateZbuffer()
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
// Minor optimization of a corner case (it allow to better emulate some alpha test effects) // Minor optimization of a corner case (it allow to better emulate some alpha test effects)
if (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z && v[0].XYZ.Z == max_z) { if (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z && v[0].XYZ.Z == max_z)
{
GL_DBG("Optimize Z test GEQUAL to ALWAYS (%s)", psm_str(m_context->ZBUF.PSM)); GL_DBG("Optimize Z test GEQUAL to ALWAYS (%s)", psm_str(m_context->ZBUF.PSM));
m_om_dssel.ztst = ZTST_ALWAYS; m_om_dssel.ztst = ZTST_ALWAYS;
} }
@ -159,7 +173,8 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask()
// Uncomment to disable texture shuffle emulation. // Uncomment to disable texture shuffle emulation.
// m_texture_shuffle = false; // m_texture_shuffle = false;
if (m_texture_shuffle) { if (m_texture_shuffle)
{
m_ps_sel.shuffle = 1; m_ps_sel.shuffle = 1;
m_ps_sel.dfmt = 0; m_ps_sel.dfmt = 0;
@ -185,11 +200,15 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask()
m_om_csel.wrgba = 0; m_om_csel.wrgba = 0;
// 2 Select the new mask (Please someone put SSE here) // 2 Select the new mask (Please someone put SSE here)
if (rg_mask != 0xFF) { if (rg_mask != 0xFF)
if (write_ba) { {
if (write_ba)
{
GL_INS("Color shuffle %s => B", read_ba ? "B" : "R"); GL_INS("Color shuffle %s => B", read_ba ? "B" : "R");
m_om_csel.wb = 1; m_om_csel.wb = 1;
} else { }
else
{
GL_INS("Color shuffle %s => R", read_ba ? "B" : "R"); GL_INS("Color shuffle %s => R", read_ba ? "B" : "R");
m_om_csel.wr = 1; m_om_csel.wr = 1;
} }
@ -197,11 +216,15 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask()
m_ps_sel.fbmask = 1; m_ps_sel.fbmask = 1;
} }
if (ba_mask != 0xFF) { if (ba_mask != 0xFF)
if (write_ba) { {
if (write_ba)
{
GL_INS("Color shuffle %s => A", read_ba ? "A" : "G"); GL_INS("Color shuffle %s => A", read_ba ? "A" : "G");
m_om_csel.wa = 1; m_om_csel.wa = 1;
} else { }
else
{
GL_INS("Color shuffle %s => G", read_ba ? "A" : "G"); GL_INS("Color shuffle %s => G", read_ba ? "A" : "G");
m_om_csel.wg = 1; m_om_csel.wg = 1;
} }
@ -209,25 +232,32 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask()
m_ps_sel.fbmask = 1; m_ps_sel.fbmask = 1;
} }
if (m_ps_sel.fbmask && m_sw_blending) { if (m_ps_sel.fbmask && m_sw_blending)
{
ps_cb.FbMask.r = rg_mask; ps_cb.FbMask.r = rg_mask;
ps_cb.FbMask.g = rg_mask; ps_cb.FbMask.g = rg_mask;
ps_cb.FbMask.b = ba_mask; ps_cb.FbMask.b = ba_mask;
ps_cb.FbMask.a = ba_mask; ps_cb.FbMask.a = ba_mask;
// No blending so hit unsafe path. // No blending so hit unsafe path.
if (!PRIM->ABE) { if (!PRIM->ABE)
{
GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on tex shuffle", fbmask); GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on tex shuffle", fbmask);
m_require_one_barrier = true; m_require_one_barrier = true;
} else { }
else
{
GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask); GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask);
m_require_full_barrier = true; m_require_full_barrier = true;
} }
} else { }
else
{
m_ps_sel.fbmask = 0; m_ps_sel.fbmask = 0;
} }
}
} else { else
{
m_ps_sel.dfmt = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt; m_ps_sel.dfmt = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt;
GSVector4i fbmask_v = GSVector4i::load((int)m_context->FRAME.FBMSK); GSVector4i fbmask_v = GSVector4i::load((int)m_context->FRAME.FBMSK);
@ -238,7 +268,8 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask()
m_ps_sel.fbmask = m_sw_blending && (~ff_fbmask & ~zero_fbmask & 0xF); m_ps_sel.fbmask = m_sw_blending && (~ff_fbmask & ~zero_fbmask & 0xF);
if (m_ps_sel.fbmask) { if (m_ps_sel.fbmask)
{
ps_cb.FbMask = fbmask_v.u8to32(); ps_cb.FbMask = fbmask_v.u8to32();
// Only alpha is special here, I think we can take a very unsafe shortcut // Only alpha is special here, I think we can take a very unsafe shortcut
// Alpha isn't blended on the GS but directly copyied into the RT. // Alpha isn't blended on the GS but directly copyied into the RT.
@ -260,11 +291,14 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask()
have been invalidated before subsequent Draws are executed. have been invalidated before subsequent Draws are executed.
*/ */
// No blending so hit unsafe path. // No blending so hit unsafe path.
if (!PRIM->ABE || !(~ff_fbmask & ~zero_fbmask & 0x7)) { if (!PRIM->ABE || !(~ff_fbmask & ~zero_fbmask & 0x7))
{
GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK,
(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32);
m_require_one_barrier = true; m_require_one_barrier = true;
} else { }
else
{
// The safe and accurate path (but slow) // The safe and accurate path (but slow)
GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK,
(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32);
@ -282,14 +316,19 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
// m_channel_shuffle = false; // m_channel_shuffle = false;
// First let's check we really have a channel shuffle effect // First let's check we really have a channel shuffle effect
if (m_channel_shuffle) { if (m_channel_shuffle)
if (m_game.title == CRC::GT4 || m_game.title == CRC::GT3 || m_game.title == CRC::GTConcept || m_game.title == CRC::TouristTrophy) { {
if (m_game.title == CRC::GT4 || m_game.title == CRC::GT3 || m_game.title == CRC::GTConcept || m_game.title == CRC::TouristTrophy)
{
GL_INS("Gran Turismo RGB Channel"); GL_INS("Gran Turismo RGB Channel");
m_ps_sel.channel = ChannelFetch_RGB; m_ps_sel.channel = ChannelFetch_RGB;
m_context->TEX0.TFX = TFX_DECAL; m_context->TEX0.TFX = TFX_DECAL;
*rt = tex->m_from_target; *rt = tex->m_from_target;
} else if (m_game.title == CRC::Tekken5) { }
if (m_context->FRAME.FBW == 1) { else if (m_game.title == CRC::Tekken5)
{
if (m_context->FRAME.FBW == 1)
{
// Used in stages: Secret Garden, Acid Rain, Moonlit Wilderness // Used in stages: Secret Garden, Acid Rain, Moonlit Wilderness
GL_INS("Tekken5 RGB Channel"); GL_INS("Tekken5 RGB Channel");
m_ps_sel.channel = ChannelFetch_RGB; m_ps_sel.channel = ChannelFetch_RGB;
@ -298,23 +337,32 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
// Minus current draw call // Minus current draw call
m_skip = 12 * (3 + 3 + 1) - 1; m_skip = 12 * (3 + 3 + 1) - 1;
*rt = tex->m_from_target; *rt = tex->m_from_target;
} else { }
else
{
// Could skip model drawing if wrongly detected // Could skip model drawing if wrongly detected
m_channel_shuffle = false; m_channel_shuffle = false;
} }
} else if ((tex->m_texture->GetType() == GSTexture::DepthStencil) && !(tex->m_32_bits_fmt)) { }
else if ((tex->m_texture->GetType() == GSTexture::DepthStencil) && !(tex->m_32_bits_fmt))
{
// So far 2 games hit this code path. Urban Chaos and Tales of Abyss // So far 2 games hit this code path. Urban Chaos and Tales of Abyss
// UC: will copy depth to green channel // UC: will copy depth to green channel
// ToA: will copy depth to alpha channel // ToA: will copy depth to alpha channel
if ((m_context->FRAME.FBMSK & 0xFF0000) == 0xFF0000) { if ((m_context->FRAME.FBMSK & 0xFF0000) == 0xFF0000)
{
// Green channel is masked // Green channel is masked
GL_INS("Tales Of Abyss Crazyness (MSB 16b depth to Alpha)"); GL_INS("Tales Of Abyss Crazyness (MSB 16b depth to Alpha)");
m_ps_sel.tales_of_abyss_hle = 1; m_ps_sel.tales_of_abyss_hle = 1;
} else { }
else
{
GL_INS("Urban Chaos Crazyness (Green extraction)"); GL_INS("Urban Chaos Crazyness (Green extraction)");
m_ps_sel.urban_chaos_hle = 1; m_ps_sel.urban_chaos_hle = 1;
} }
} else if (m_index.tail <= 64 && m_context->CLAMP.WMT == 3) { }
else if (m_index.tail <= 64 && m_context->CLAMP.WMT == 3)
{
// Blood will tell. I think it is channel effect too but again // Blood will tell. I think it is channel effect too but again
// implemented in a different way. I don't want to add more CRC stuff. So // implemented in a different way. I don't want to add more CRC stuff. So
// let's disable channel when the signature is different // let's disable channel when the signature is different
@ -323,23 +371,29 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
// handled above. // handled above.
GL_INS("Maybe not a channel!"); GL_INS("Maybe not a channel!");
m_channel_shuffle = false; m_channel_shuffle = false;
} else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MAXU & 0x8) == 8)) { }
else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MAXU & 0x8) == 8))
{
// Read either blue or Alpha. Let's go for Blue ;) // Read either blue or Alpha. Let's go for Blue ;)
// MGS3/Kill Zone // MGS3/Kill Zone
GL_INS("Blue channel"); GL_INS("Blue channel");
m_ps_sel.channel = ChannelFetch_BLUE; m_ps_sel.channel = ChannelFetch_BLUE;
} else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MINU & 0x8) == 0)) { }
else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MINU & 0x8) == 0))
{
// Read either Red or Green. Let's check the V coordinate. 0-1 is likely top so // Read either Red or Green. Let's check the V coordinate. 0-1 is likely top so
// red. 2-3 is likely bottom so green (actually depends on texture base pointer offset) // red. 2-3 is likely bottom so green (actually depends on texture base pointer offset)
bool green = PRIM->FST && (m_vertex.buff[0].V & 32); bool green = PRIM->FST && (m_vertex.buff[0].V & 32);
if (green && (m_context->FRAME.FBMSK & 0x00FFFFFF) == 0x00FFFFFF) { if (green && (m_context->FRAME.FBMSK & 0x00FFFFFF) == 0x00FFFFFF)
{
// Typically used in Terminator 3 // Typically used in Terminator 3
int blue_mask = m_context->FRAME.FBMSK >> 24; int blue_mask = m_context->FRAME.FBMSK >> 24;
int green_mask = ~blue_mask & 0xFF; int green_mask = ~blue_mask & 0xFF;
int blue_shift = -1; int blue_shift = -1;
// Note: potentially we could also check the value of the clut // Note: potentially we could also check the value of the clut
switch (m_context->FRAME.FBMSK >> 24) { switch (m_context->FRAME.FBMSK >> 24)
{
case 0xFF: ASSERT(0); break; case 0xFF: ASSERT(0); break;
case 0xFE: blue_shift = 1; break; case 0xFE: blue_shift = 1; break;
case 0xFC: blue_shift = 2; break; case 0xFC: blue_shift = 2; break;
@ -354,31 +408,40 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
int green_shift = 8 - blue_shift; int green_shift = 8 - blue_shift;
dev->SetupCBMisc(GSVector4i(blue_mask, blue_shift, green_mask, green_shift)); dev->SetupCBMisc(GSVector4i(blue_mask, blue_shift, green_mask, green_shift));
if (blue_shift >= 0) { if (blue_shift >= 0)
{
GL_INS("Green/Blue channel (%d, %d)", blue_shift, green_shift); GL_INS("Green/Blue channel (%d, %d)", blue_shift, green_shift);
m_ps_sel.channel = ChannelFetch_GXBY; m_ps_sel.channel = ChannelFetch_GXBY;
m_context->FRAME.FBMSK = 0x00FFFFFF; m_context->FRAME.FBMSK = 0x00FFFFFF;
} else { }
else
{
GL_INS("Green channel (wrong mask) (fbmask %x)", m_context->FRAME.FBMSK >> 24); GL_INS("Green channel (wrong mask) (fbmask %x)", m_context->FRAME.FBMSK >> 24);
m_ps_sel.channel = ChannelFetch_GREEN; m_ps_sel.channel = ChannelFetch_GREEN;
} }
}
} else if (green) { else if (green)
{
GL_INS("Green channel"); GL_INS("Green channel");
m_ps_sel.channel = ChannelFetch_GREEN; m_ps_sel.channel = ChannelFetch_GREEN;
} else { }
else
{
// Pop // Pop
GL_INS("Red channel"); GL_INS("Red channel");
m_ps_sel.channel = ChannelFetch_RED; m_ps_sel.channel = ChannelFetch_RED;
} }
} else { }
else
{
GL_INS("Channel not supported"); GL_INS("Channel not supported");
m_channel_shuffle = false; m_channel_shuffle = false;
} }
} }
// Effect is really a channel shuffle effect so let's cheat a little // Effect is really a channel shuffle effect so let's cheat a little
if (m_channel_shuffle) { if (m_channel_shuffle)
{
dev->PSSetShaderResource(4, tex->m_from_target); dev->PSSetShaderResource(4, tex->m_from_target);
m_require_one_barrier = true; m_require_one_barrier = true;
@ -395,8 +458,9 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_vertex.head = m_vertex.tail = m_vertex.next = 2;
m_index.tail = 2; m_index.tail = 2;
}
} else { else
{
#ifdef ENABLE_OGL_DEBUG #ifdef ENABLE_OGL_DEBUG
dev->PSSetShaderResource(4, NULL); dev->PSSetShaderResource(4, NULL);
#endif #endif
@ -410,12 +474,14 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
bool sw_blending = false; bool sw_blending = false;
// No blending so early exit // No blending so early exit
if (!(PRIM->ABE || m_env.PABE.PABE || (PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS))) { if (!(PRIM->ABE || m_env.PABE.PABE || (PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS)))
{
dev->OMSetBlendState(); dev->OMSetBlendState();
return; return;
} }
if (m_env.PABE.PABE) { if (m_env.PABE.PABE)
{
// Breath of Fire Dragon Quarter, Strawberry Shortcake, Super Robot Wars, Cartoon Network Racing. // Breath of Fire Dragon Quarter, Strawberry Shortcake, Super Robot Wars, Cartoon Network Racing.
GL_INS("PABE mode ENABLED"); GL_INS("PABE mode ENABLED");
m_ps_sel.pabe = 1; m_ps_sel.pabe = 1;
@ -438,12 +504,14 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
// Warning no break on purpose // Warning no break on purpose
// Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks. // Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks.
switch (m_sw_blending) { switch (m_sw_blending)
{
case ACC_BLEND_ULTRA: case ACC_BLEND_ULTRA:
sw_blending |= true; sw_blending |= true;
[[fallthrough]]; [[fallthrough]];
case ACC_BLEND_FULL: case ACC_BLEND_FULL:
if (!m_vt.m_alpha.valid && (ALPHA.C == 0)) GetAlphaMinMax(); if (!m_vt.m_alpha.valid && (ALPHA.C == 0))
GetAlphaMinMax();
sw_blending |= (ALPHA.A != ALPHA.B) && ((ALPHA.C == 0 && m_vt.m_alpha.max > 128) || (ALPHA.C == 2 && ALPHA.FIX > 128u)); sw_blending |= (ALPHA.A != ALPHA.B) && ((ALPHA.C == 0 && m_vt.m_alpha.max > 128) || (ALPHA.C == 2 && ALPHA.FIX > 128u));
[[fallthrough]]; [[fallthrough]];
case ACC_BLEND_HIGH: case ACC_BLEND_HIGH:
@ -463,29 +531,37 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
} }
// Color clip // Color clip
if (m_env.COLCLAMP.CLAMP == 0) { if (m_env.COLCLAMP.CLAMP == 0)
{
// Safe FBMASK, avoid hitting accumulation mode on 16bit, // Safe FBMASK, avoid hitting accumulation mode on 16bit,
// fixes shadows in Superman shadows of Apokolips. // fixes shadows in Superman shadows of Apokolips.
const bool sw_fbmask_colclip = !m_require_one_barrier && m_ps_sel.fbmask; const bool sw_fbmask_colclip = !m_require_one_barrier && m_ps_sel.fbmask;
const bool free_colclip = m_prim_overlap == PRIM_OVERLAP_NO || blend_non_recursive || sw_fbmask_colclip; const bool free_colclip = m_prim_overlap == PRIM_OVERLAP_NO || blend_non_recursive || sw_fbmask_colclip;
GL_DBG("COLCLIP Info (Blending: %d/%d/%d/%d, SW FBMASK: %d, OVERLAP: %d)", GL_DBG("COLCLIP Info (Blending: %d/%d/%d/%d, SW FBMASK: %d, OVERLAP: %d)",
ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, sw_fbmask_colclip, m_prim_overlap); ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, sw_fbmask_colclip, m_prim_overlap);
if (free_colclip) { if (free_colclip)
{
// The fastest algo that requires a single pass // The fastest algo that requires a single pass
GL_INS("COLCLIP Free mode ENABLED"); GL_INS("COLCLIP Free mode ENABLED");
m_ps_sel.colclip = 1; m_ps_sel.colclip = 1;
sw_blending = true; sw_blending = true;
accumulation_blend = false; // disable the HDR algo accumulation_blend = false; // disable the HDR algo
} else if (accumulation_blend) { }
else if (accumulation_blend)
{
// A fast algo that requires 2 passes // A fast algo that requires 2 passes
GL_INS("COLCLIP Fast HDR mode ENABLED"); GL_INS("COLCLIP Fast HDR mode ENABLED");
m_ps_sel.hdr = 1; m_ps_sel.hdr = 1;
sw_blending = true; // Enable sw blending for the HDR algo sw_blending = true; // Enable sw blending for the HDR algo
} else if (sw_blending) { }
else if (sw_blending)
{
// A slow algo that could requires several passes (barely used) // A slow algo that could requires several passes (barely used)
GL_INS("COLCLIP SW mode ENABLED"); GL_INS("COLCLIP SW mode ENABLED");
m_ps_sel.colclip = 1; m_ps_sel.colclip = 1;
} else { }
else
{
GL_INS("COLCLIP HDR mode ENABLED"); GL_INS("COLCLIP HDR mode ENABLED");
m_ps_sel.hdr = 1; m_ps_sel.hdr = 1;
} }
@ -497,7 +573,8 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
// Switch DATE_GL42 with DATE_GL45 in such cases to ensure accuracy. // Switch DATE_GL42 with DATE_GL45 in such cases to ensure accuracy.
// No mix of COLCLIP + sw blend + DATE_GL42, neither sw fbmask + DATE_GL42. // No mix of COLCLIP + sw blend + DATE_GL42, neither sw fbmask + DATE_GL42.
// Note: Do the swap after colclip to avoid adding extra conditions. // Note: Do the swap after colclip to avoid adding extra conditions.
if (sw_blending && DATE_GL42) { if (sw_blending && DATE_GL42)
{
GL_PERF("DATE: Swap DATE_GL42 with DATE_GL45"); GL_PERF("DATE: Swap DATE_GL42 with DATE_GL45");
m_require_full_barrier = true; m_require_full_barrier = true;
DATE_GL42 = false; DATE_GL42 = false;
@ -509,16 +586,19 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (drawlist %d) (sw %d)", GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (drawlist %d) (sw %d)",
ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending); ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending);
#endif #endif
if (sw_blending) { if (sw_blending)
{
m_ps_sel.blend_a = ALPHA.A; m_ps_sel.blend_a = ALPHA.A;
m_ps_sel.blend_b = ALPHA.B; m_ps_sel.blend_b = ALPHA.B;
m_ps_sel.blend_c = ALPHA.C; m_ps_sel.blend_c = ALPHA.C;
m_ps_sel.blend_d = ALPHA.D; m_ps_sel.blend_d = ALPHA.D;
if (accumulation_blend) { if (accumulation_blend)
{
// Keep HW blending to do the addition/subtraction // Keep HW blending to do the addition/subtraction
dev->OMSetBlendState(blend_index, 0, false, true); dev->OMSetBlendState(blend_index, 0, false, true);
if (ALPHA.A == 2) { if (ALPHA.A == 2)
{
// The blend unit does a reverse subtraction so it means // The blend unit does a reverse subtraction so it means
// the shader must output a positive value. // the shader must output a positive value.
// Replace 0 - Cs by Cs - 0 // Replace 0 - Cs by Cs - 0
@ -529,8 +609,9 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
m_ps_sel.blend_d = 2; m_ps_sel.blend_d = 2;
// Note accumulation_blend doesn't require a barrier // Note accumulation_blend doesn't require a barrier
}
} else { else
{
// Disable HW blending // Disable HW blending
dev->OMSetBlendState(); dev->OMSetBlendState();
@ -538,16 +619,22 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
} }
// Require the fix alpha vlaue // Require the fix alpha vlaue
if (ALPHA.C == 2) { if (ALPHA.C == 2)
{
ps_cb.TA_Af.a = (float)ALPHA.FIX / 128.0f; ps_cb.TA_Af.a = (float)ALPHA.FIX / 128.0f;
} }
} else { }
else
{
m_ps_sel.clr1 = !!(blend_flag & BLEND_C_CLR); m_ps_sel.clr1 = !!(blend_flag & BLEND_C_CLR);
if (m_ps_sel.dfmt == 1 && ALPHA.C == 1) { if (m_ps_sel.dfmt == 1 && ALPHA.C == 1)
{
// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent // 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent
const uint8 hacked_blend_index = blend_index + 3; // +3 <=> +1 on C const uint8 hacked_blend_index = blend_index + 3; // +3 <=> +1 on C
dev->OMSetBlendState(hacked_blend_index, 128, true); dev->OMSetBlendState(hacked_blend_index, 128, true);
} else { }
else
{
dev->OMSetBlendState(blend_index, ALPHA.FIX, (ALPHA.C == 2)); dev->OMSetBlendState(blend_index, ALPHA.FIX, (ALPHA.C == 2));
} }
} }
@ -581,7 +668,8 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
break; break;
case TriFiltering::PS2: case TriFiltering::PS2:
if (need_mipmap && m_mipmap != 2) { if (need_mipmap && m_mipmap != 2)
{
trilinear = m_context->TEX1.MMIN; trilinear = m_context->TEX1.MMIN;
trilinear_auto = true; trilinear_auto = true;
} }
@ -603,14 +691,16 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
// Performance note: // Performance note:
// 1/ Don't set 0 as it is the default value // 1/ Don't set 0 as it is the default value
// 2/ Only keep aem when it is useful (avoid useless shader permutation) // 2/ Only keep aem when it is useful (avoid useless shader permutation)
if (m_ps_sel.shuffle) { if (m_ps_sel.shuffle)
{
// Force a 32 bits access (normally shuffle is done on 16 bits) // Force a 32 bits access (normally shuffle is done on 16 bits)
// m_ps_sel.tex_fmt = 0; // removed as an optimization // m_ps_sel.tex_fmt = 0; // removed as an optimization
m_ps_sel.aem = m_env.TEXA.AEM; m_ps_sel.aem = m_env.TEXA.AEM;
ASSERT(tex->m_target); ASSERT(tex->m_target);
// Require a float conversion if the texure is a depth otherwise uses Integral scaling // Require a float conversion if the texure is a depth otherwise uses Integral scaling
if (psm.depth) { if (psm.depth)
{
m_ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : 1; m_ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : 1;
m_vs_sel.int_fst = !PRIM->FST; // select float/int coordinate m_vs_sel.int_fst = !PRIM->FST; // select float/int coordinate
} }
@ -626,8 +716,9 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
bilinear &= m_vt.IsLinear(); bilinear &= m_vt.IsLinear();
vs_cb.TextureOffset = RealignTargetTextureCoordinate(tex); vs_cb.TextureOffset = RealignTargetTextureCoordinate(tex);
}
} else if (tex->m_target) { else if (tex->m_target)
{
// Use an old target. AEM and index aren't resolved it must be done // Use an old target. AEM and index aren't resolved it must be done
// on the GPU // on the GPU
@ -636,7 +727,8 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
m_ps_sel.aem = m_env.TEXA.AEM; m_ps_sel.aem = m_env.TEXA.AEM;
// Don't upload AEM if format is 32 bits // Don't upload AEM if format is 32 bits
if (cpsm.fmt) { if (cpsm.fmt)
{
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
ta /= 255.0f; ta /= 255.0f;
// FIXME rely on compiler for the optimization // FIXME rely on compiler for the optimization
@ -645,7 +737,8 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
} }
// Select the index format // Select the index format
if (tex->m_palette) { if (tex->m_palette)
{
// FIXME Potentially improve fmt field in GSLocalMemory // FIXME Potentially improve fmt field in GSLocalMemory
if (m_context->TEX0.PSM == PSM_PSMT4HL) if (m_context->TEX0.PSM == PSM_PSMT4HL)
m_ps_sel.tex_fmt |= 1 << 2; m_ps_sel.tex_fmt |= 1 << 2;
@ -661,14 +754,17 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
} }
// Depth format // Depth format
if (tex->m_texture->GetType() == GSTexture::DepthStencil) { if (tex->m_texture->GetType() == GSTexture::DepthStencil)
{
// Require a float conversion if the texure is a depth format // Require a float conversion if the texure is a depth format
m_ps_sel.depth_fmt = (psm.bpp == 16) ? 2 : 1; m_ps_sel.depth_fmt = (psm.bpp == 16) ? 2 : 1;
m_vs_sel.int_fst = !PRIM->FST; // select float/int coordinate m_vs_sel.int_fst = !PRIM->FST; // select float/int coordinate
// Don't force interpolation on depth format // Don't force interpolation on depth format
bilinear &= m_vt.IsLinear(); bilinear &= m_vt.IsLinear();
} else if (psm.depth) { }
else if (psm.depth)
{
// Use Integral scaling // Use Integral scaling
m_ps_sel.depth_fmt = 3; m_ps_sel.depth_fmt = 3;
m_vs_sel.int_fst = !PRIM->FST; // select float/int coordinate m_vs_sel.int_fst = !PRIM->FST; // select float/int coordinate
@ -678,25 +774,30 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
} }
vs_cb.TextureOffset = RealignTargetTextureCoordinate(tex); vs_cb.TextureOffset = RealignTargetTextureCoordinate(tex);
}
} else if (tex->m_palette) { else if (tex->m_palette)
{
// Use a standard 8 bits texture. AEM is already done on the CLUT // Use a standard 8 bits texture. AEM is already done on the CLUT
// Therefore you only need to set the index // Therefore you only need to set the index
// m_ps_sel.aem = 0; // removed as an optimization // m_ps_sel.aem = 0; // removed as an optimization
// Note 4 bits indexes are converted to 8 bits // Note 4 bits indexes are converted to 8 bits
m_ps_sel.tex_fmt = 3 << 2; m_ps_sel.tex_fmt = 3 << 2;
}
} else { else
{
// Standard texture. Both index and AEM expansion were already done by the CPU. // Standard texture. Both index and AEM expansion were already done by the CPU.
// m_ps_sel.tex_fmt = 0; // removed as an optimization // m_ps_sel.tex_fmt = 0; // removed as an optimization
// m_ps_sel.aem = 0; // removed as an optimization // m_ps_sel.aem = 0; // removed as an optimization
} }
if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) { if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128)))
{
// Micro optimization that reduces GPU load (removes 5 instructions on the FS program) // Micro optimization that reduces GPU load (removes 5 instructions on the FS program)
m_ps_sel.tfx = TFX_DECAL; m_ps_sel.tfx = TFX_DECAL;
} else { }
else
{
m_ps_sel.tfx = m_context->TEX0.TFX; m_ps_sel.tfx = m_context->TEX0.TFX;
} }
@ -717,16 +818,21 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
ps_cb.WH = WH; ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
if (complex_wms_wmt) { if (complex_wms_wmt)
{
ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV); ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);
ps_cb.MinMax = GSVector4(ps_cb.MskFix) / WH.xyxy(); ps_cb.MinMax = GSVector4(ps_cb.MskFix) / WH.xyxy();
} else if (trilinear_manual) { }
else if (trilinear_manual)
{
// Reuse MinMax for mipmap parameter to avoid an extension of the UBO // Reuse MinMax for mipmap parameter to avoid an extension of the UBO
ps_cb.MinMax.x = (float)m_context->TEX1.K / 16.0f; ps_cb.MinMax.x = (float)m_context->TEX1.K / 16.0f;
ps_cb.MinMax.y = float(1 << m_context->TEX1.L); ps_cb.MinMax.y = float(1 << m_context->TEX1.L);
ps_cb.MinMax.z = float(m_lod.x); // Offset because first layer is m_lod, dunno if we can do better ps_cb.MinMax.z = float(m_lod.x); // Offset because first layer is m_lod, dunno if we can do better
ps_cb.MinMax.w = float(m_lod.y); ps_cb.MinMax.w = float(m_lod.y);
} else if (trilinear_auto) { }
else if (trilinear_auto)
{
tex->m_texture->GenerateMipmap(); tex->m_texture->GenerateMipmap();
} }
@ -735,7 +841,8 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
ps_cb.TC_OH_TS = GSVector4(1 / 16.0f, 1 / 16.0f, m_userhacks_tcoffset_x, m_userhacks_tcoffset_y) / WH.xyxy(); ps_cb.TC_OH_TS = GSVector4(1 / 16.0f, 1 / 16.0f, m_userhacks_tcoffset_x, m_userhacks_tcoffset_y) / WH.xyxy();
// Must be done after all coordinates math // Must be done after all coordinates math
if (m_context->HasFixedTEX0() && !PRIM->FST) { if (m_context->HasFixedTEX0() && !PRIM->FST)
{
m_ps_sel.invalid_tex0 = 1; m_ps_sel.invalid_tex0 = 1;
// Use invalid size to denormalize ST coordinate // Use invalid size to denormalize ST coordinate
ps_cb.WH.x = (float)(1 << m_context->stack.TEX0.TW); ps_cb.WH.x = (float)(1 << m_context->stack.TEX0.TW);
@ -748,20 +855,26 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
// Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader // Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader
m_ps_ssel.tau = (wms != CLAMP_CLAMP); m_ps_ssel.tau = (wms != CLAMP_CLAMP);
m_ps_ssel.tav = (wmt != CLAMP_CLAMP); m_ps_ssel.tav = (wmt != CLAMP_CLAMP);
if (shader_emulated_sampler) { if (shader_emulated_sampler)
{
m_ps_ssel.biln = 0; m_ps_ssel.biln = 0;
m_ps_ssel.aniso = 0; m_ps_ssel.aniso = 0;
m_ps_ssel.triln = 0; m_ps_ssel.triln = 0;
} else { }
else
{
m_ps_ssel.biln = bilinear; m_ps_ssel.biln = bilinear;
// Aniso filtering doesn't work with textureLod so use texture (automatic_lod) instead. // Aniso filtering doesn't work with textureLod so use texture (automatic_lod) instead.
// Enable aniso only for triangles. Sprites are flat so aniso is likely useless (it would save perf for others primitives). // Enable aniso only for triangles. Sprites are flat so aniso is likely useless (it would save perf for others primitives).
const bool anisotropic = m_vt.m_primclass == GS_TRIANGLE_CLASS && !trilinear_manual; const bool anisotropic = m_vt.m_primclass == GS_TRIANGLE_CLASS && !trilinear_manual;
m_ps_ssel.aniso = anisotropic; m_ps_ssel.aniso = anisotropic;
m_ps_ssel.triln = trilinear; m_ps_ssel.triln = trilinear;
if (trilinear_manual) { if (trilinear_manual)
{
m_ps_sel.manual_lod = 1; m_ps_sel.manual_lod = 1;
} else if (trilinear_auto || anisotropic) { }
else if (trilinear_auto || anisotropic)
{
m_ps_sel.automatic_lod = 1; m_ps_sel.automatic_lod = 1;
} }
} }
@ -788,7 +901,8 @@ GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap()
m_drawlist.clear(); m_drawlist.clear();
size_t i = 0; size_t i = 0;
while (i < count) { while (i < count)
{
// In order to speed up comparison a bounding-box is accumulated. It removes a // In order to speed up comparison a bounding-box is accumulated. It removes a
// loop so code is much faster (check game virtua fighter). Besides it allow to check // loop so code is much faster (check game virtua fighter). Besides it allow to check
// properly the Y order. // properly the Y order.
@ -801,7 +915,8 @@ GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap()
all = all.xyxy().blend(all.zwzw(), all > all.zwxy()); all = all.xyxy().blend(all.zwzw(), all > all.zwxy());
size_t j = i + 2; size_t j = i + 2;
while (j < count) { while (j < count)
{
GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j + 1].m[1])).upl16().xzyw(); GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j + 1].m[1])).upl16().xzyw();
sprite = sprite.xyxy().blend(sprite.zwzw(), sprite > sprite.zwxy()); sprite = sprite.xyxy().blend(sprite.zwzw(), sprite > sprite.zwxy());
@ -812,9 +927,12 @@ GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap()
ASSERT(all.x <= all.z); ASSERT(all.x <= all.z);
ASSERT(all.y <= all.w); ASSERT(all.y <= all.w);
if (all.rintersect(sprite).rempty()) { if (all.rintersect(sprite).rempty())
{
all = all.runion_ordered(sprite); all = all.runion_ordered(sprite);
} else { }
else
{
overlap = PRIM_OVERLAP_YES; overlap = PRIM_OVERLAP_YES;
break; break;
} }
@ -873,18 +991,25 @@ void GSRendererOGL::SendDraw()
{ {
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
if (!m_require_full_barrier && m_require_one_barrier) { if (!m_require_full_barrier && m_require_one_barrier)
{
// Need only a single barrier // Need only a single barrier
glTextureBarrier(); glTextureBarrier();
dev->DrawIndexedPrimitive(); dev->DrawIndexedPrimitive();
} else if (!m_require_full_barrier) { }
else if (!m_require_full_barrier)
{
// Don't need any barrier // Don't need any barrier
dev->DrawIndexedPrimitive(); dev->DrawIndexedPrimitive();
} else if (m_prim_overlap == PRIM_OVERLAP_NO) { }
else if (m_prim_overlap == PRIM_OVERLAP_NO)
{
// Need full barrier but a single barrier will be enough // Need full barrier but a single barrier will be enough
glTextureBarrier(); glTextureBarrier();
dev->DrawIndexedPrimitive(); dev->DrawIndexedPrimitive();
} else if (m_vt.m_primclass == GS_SPRITE_CLASS) { }
else if (m_vt.m_primclass == GS_SPRITE_CLASS)
{
size_t nb_vertex = (m_gs_sel.sprite == 1) ? 2 : 6; size_t nb_vertex = (m_gs_sel.sprite == 1) ? 2 : 6;
GL_PUSH("Split the draw (SPRITE)"); GL_PUSH("Split the draw (SPRITE)");
@ -903,12 +1028,15 @@ void GSRendererOGL::SendDraw()
m_index.tail / nb_vertex, m_drawlist.size(), message.c_str()); m_index.tail / nb_vertex, m_drawlist.size(), message.c_str());
#endif #endif
for (size_t count, p = 0, n = 0; n < m_drawlist.size(); p += count, ++n) { for (size_t count, p = 0, n = 0; n < m_drawlist.size(); p += count, ++n)
{
count = m_drawlist[n] * nb_vertex; count = m_drawlist[n] * nb_vertex;
glTextureBarrier(); glTextureBarrier();
dev->DrawIndexedPrimitive(p, count); dev->DrawIndexedPrimitive(p, count);
} }
} else { }
else
{
// FIXME: Investigate: a dynamic check to pack as many primitives as possibles // FIXME: Investigate: a dynamic check to pack as many primitives as possibles
// I'm nearly sure GSdx already have this kind of code (maybe we can adapt GSDirtyRect) // I'm nearly sure GSdx already have this kind of code (maybe we can adapt GSDirtyRect)
size_t nb_vertex = GSUtil::GetClassVertexCount(m_vt.m_primclass); size_t nb_vertex = GSUtil::GetClassVertexCount(m_vt.m_primclass);
@ -917,7 +1045,8 @@ void GSRendererOGL::SendDraw()
GL_PERF("Split single draw in %d draw", m_index.tail / nb_vertex); GL_PERF("Split single draw in %d draw", m_index.tail / nb_vertex);
for (size_t p = 0; p < m_index.tail; p += nb_vertex) { for (size_t p = 0; p < m_index.tail; p += nb_vertex)
{
glTextureBarrier(); glTextureBarrier();
dev->DrawIndexedPrimitive(p, nb_vertex); dev->DrawIndexedPrimitive(p, nb_vertex);
} }
@ -948,8 +1077,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
tex && tex->m_texture ? tex->m_texture->GetID() : -1, tex && tex->m_texture ? tex->m_texture->GetID() : -1,
area_in.x, area_in.y, area_in.z, area_in.w, area_in.x, area_in.y, area_in.z, area_in.w,
rt ? rt->GetID() : -1, ds ? ds->GetID() : -1, rt ? rt->GetID() : -1, ds ? ds->GetID() : -1,
area_out.x, area_out.y, area_out.z, area_out.w area_out.x, area_out.y, area_out.z, area_out.w);
);
#endif #endif
GSTexture* hdr_rt = NULL; GSTexture* hdr_rt = NULL;
@ -985,15 +1113,19 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
m_prim_overlap = PrimitiveOverlap(); m_prim_overlap = PrimitiveOverlap();
// Detect framebuffer read that will need special handling // Detect framebuffer read that will need special handling
if ((m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && m_sw_blending) { if ((m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && m_sw_blending)
if ((m_context->FRAME.FBMSK == 0x00FFFFFF) && (m_vt.m_primclass == GS_TRIANGLE_CLASS)) { {
if ((m_context->FRAME.FBMSK == 0x00FFFFFF) && (m_vt.m_primclass == GS_TRIANGLE_CLASS))
{
// This pattern is used by several games to emulate a stencil (shadow) // This pattern is used by several games to emulate a stencil (shadow)
// Ratchet & Clank, Jak do alpha integer multiplication (tfx) which is mostly equivalent to +1/-1 // Ratchet & Clank, Jak do alpha integer multiplication (tfx) which is mostly equivalent to +1/-1
// Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1 // Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1
GL_DBG("Source and Target are the same! Let's sample the framebuffer"); GL_DBG("Source and Target are the same! Let's sample the framebuffer");
m_ps_sel.tex_is_fb = 1; m_ps_sel.tex_is_fb = 1;
m_require_full_barrier = true; m_require_full_barrier = true;
} else if (m_prim_overlap != PRIM_OVERLAP_NO) { }
else if (m_prim_overlap != PRIM_OVERLAP_NO)
{
// Note: It is fine if the texture fits in a single GS page. First access will cache // Note: It is fine if the texture fits in a single GS page. First access will cache
// the page in the GS texture buffer. // the page in the GS texture buffer.
GL_INS("ERROR: Source and Target are the same!"); GL_INS("ERROR: Source and Target are the same!");
@ -1069,20 +1201,25 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Blend // Blend
if (!IsOpaque() && rt) { if (!IsOpaque() && rt)
{
EmulateBlending(DATE_GL42, DATE_GL45); EmulateBlending(DATE_GL42, DATE_GL45);
} else { }
else
{
dev->OMSetBlendState(); // No blending please dev->OMSetBlendState(); // No blending please
} }
if (m_ps_sel.dfmt == 1) { if (m_ps_sel.dfmt == 1)
{
// Disable writing of the alpha channel // Disable writing of the alpha channel
m_om_csel.wa = 0; m_om_csel.wa = 0;
} }
// DATE setup, no DATE_GL45 please // DATE setup, no DATE_GL45 please
if (DATE && !DATE_GL45) { if (DATE && !DATE_GL45)
{
GSVector4i dRect = ComputeBoundingBox(rtscale, rtsize); GSVector4i dRect = ComputeBoundingBox(rtscale, rtsize);
// Reduce the quantity of clean function // Reduce the quantity of clean function
@ -1091,11 +1228,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Must be done here to avoid any GL state pertubation (clear function...) // Must be done here to avoid any GL state pertubation (clear function...)
// Create an r32ui image that will containt primitive ID // Create an r32ui image that will containt primitive ID
if (DATE_GL42) { if (DATE_GL42)
{
dev->InitPrimDateTexture(rt, dRect); dev->InitPrimDateTexture(rt, dRect);
} else if (DATE_one) { }
else if (DATE_one)
{
dev->ClearStencil(ds, 1); dev->ClearStencil(ds, 1);
} else { }
else
{
GSVector4 src = GSVector4(dRect) / GSVector4(rtsize.x, rtsize.y).xyxy(); GSVector4 src = GSVector4(dRect) / GSVector4(rtsize.x, rtsize.y).xyxy();
GSVector4 dst = src * 2.0f - 1.0f; GSVector4 dst = src * 2.0f - 1.0f;
@ -1147,14 +1289,19 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// GS_SPRITE_CLASS are already flat (either by CPU or the GS) // GS_SPRITE_CLASS are already flat (either by CPU or the GS)
m_ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP; m_ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP;
if (DATE_GL45) { if (DATE_GL45)
{
m_ps_sel.date = 5 + m_context->TEST.DATM; m_ps_sel.date = 5 + m_context->TEST.DATM;
} else if (DATE_one) { }
else if (DATE_one)
{
m_require_one_barrier = true; m_require_one_barrier = true;
m_ps_sel.date = 5 + m_context->TEST.DATM; m_ps_sel.date = 5 + m_context->TEST.DATM;
m_om_dssel.date = 1; m_om_dssel.date = 1;
m_om_dssel.date_one = 1; m_om_dssel.date_one = 1;
} else if (DATE) { }
else if (DATE)
{
if (DATE_GL42) if (DATE_GL42)
m_ps_sel.date = 1 + m_context->TEST.DATM; m_ps_sel.date = 1 + m_context->TEST.DATM;
else else
@ -1191,7 +1338,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
bool ate_RGBA_then_Z = false; bool ate_RGBA_then_Z = false;
bool ate_RGB_then_ZA = false; bool ate_RGB_then_ZA = false;
uint8 ps_atst = 0; uint8 ps_atst = 0;
if (ate_first_pass & ate_second_pass) { if (ate_first_pass & ate_second_pass)
{
GL_DBG("Complex Alpha Test"); GL_DBG("Complex Alpha Test");
const bool commutative_depth = (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z) || (m_om_dssel.ztst == ZTST_ALWAYS); const bool commutative_depth = (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z) || (m_om_dssel.ztst == ZTST_ALWAYS);
const bool commutative_alpha = (m_context->ALPHA.C != 1); // when either Alpha Src or a constant const bool commutative_alpha = (m_context->ALPHA.C != 1); // when either Alpha Src or a constant
@ -1200,38 +1348,48 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
ate_RGB_then_ZA = (m_context->TEST.AFAIL == AFAIL_RGB_ONLY) & commutative_depth & commutative_alpha; ate_RGB_then_ZA = (m_context->TEST.AFAIL == AFAIL_RGB_ONLY) & commutative_depth & commutative_alpha;
} }
if (ate_RGBA_then_Z) { if (ate_RGBA_then_Z)
{
GL_DBG("Alternate ATE handling: ate_RGBA_then_Z"); GL_DBG("Alternate ATE handling: ate_RGBA_then_Z");
// Render all color but don't update depth // Render all color but don't update depth
// ATE is disabled here // ATE is disabled here
m_om_dssel.zwe = false; m_om_dssel.zwe = false;
} else if (ate_RGB_then_ZA) { }
else if (ate_RGB_then_ZA)
{
GL_DBG("Alternate ATE handling: ate_RGB_then_ZA"); GL_DBG("Alternate ATE handling: ate_RGB_then_ZA");
// Render RGB color but don't update depth/alpha // Render RGB color but don't update depth/alpha
// ATE is disabled here // ATE is disabled here
m_om_dssel.zwe = false; m_om_dssel.zwe = false;
m_om_csel.wa = false; m_om_csel.wa = false;
} else { }
else
{
EmulateAtst(ps_cb.FogColor_AREF, ps_atst, false); EmulateAtst(ps_cb.FogColor_AREF, ps_atst, false);
m_ps_sel.atst = ps_atst; m_ps_sel.atst = ps_atst;
} }
if (tex) { if (tex)
{
EmulateTextureSampler(tex); EmulateTextureSampler(tex);
} else { }
else
{
m_ps_sel.tfx = 4; m_ps_sel.tfx = 4;
} }
// Always bind the RT. This way special effect can use it. // Always bind the RT. This way special effect can use it.
dev->PSSetShaderResource(3, rt); dev->PSSetShaderResource(3, rt);
if (m_game.title == CRC::ICO) { if (m_game.title == CRC::ICO)
{
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
const GSVideoMode mode = GetVideoMode(); const GSVideoMode mode = GetVideoMode();
if (tex && m_vt.m_primclass == GS_SPRITE_CLASS && m_vertex.next == 2 && PRIM->ABE && // Blend texture if (tex && m_vt.m_primclass == GS_SPRITE_CLASS && m_vertex.next == 2 && PRIM->ABE && // Blend texture
((v[1].U == 8200 && v[1].V == 7176 && mode == GSVideoMode::NTSC) || // at display resolution 512x448 ((v[1].U == 8200 && v[1].V == 7176 && mode == GSVideoMode::NTSC) || // at display resolution 512x448
(v[1].U == 8200 && v[1].V == 8200 && mode == GSVideoMode::PAL)) && // at display resolution 512x512 (v[1].U == 8200 && v[1].V == 8200 && mode == GSVideoMode::PAL)) && // at display resolution 512x512
tex->m_TEX0.PSM == PSM_PSMT8H) { // i.e. read the alpha channel of a 32 bits texture tex->m_TEX0.PSM == PSM_PSMT8H) // i.e. read the alpha channel of a 32 bits texture
{
// Note potentially we can limit to TBP0:0x2800 // Note potentially we can limit to TBP0:0x2800
// Depth buffer was moved so GSdx will invalide it which means a // Depth buffer was moved so GSdx will invalide it which means a
@ -1248,7 +1406,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->PSSetShaderResource(4, ds); dev->PSSetShaderResource(4, ds);
// We need the palette to convert the depth to the correct alpha value. // We need the palette to convert the depth to the correct alpha value.
if (!tex->m_palette) { if (!tex->m_palette)
{
uint16 pal = GSLocalMemory::m_psm[tex->m_TEX0.PSM].pal; uint16 pal = GSLocalMemory::m_psm[tex->m_TEX0.PSM].pal;
m_tc->AttachPaletteToSource(tex, pal, true); m_tc->AttachPaletteToSource(tex, pal, true);
dev->PSSetShaderResource(1, tex->m_palette); dev->PSSetShaderResource(1, tex->m_palette);
@ -1277,7 +1436,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
if (ds) if (ds)
ds->CommitRegion(GSVector2i(commitRect.z, commitRect.w)); ds->CommitRegion(GSVector2i(commitRect.z, commitRect.w));
if (DATE_GL42) { if (DATE_GL42)
{
GL_PUSH("Date GL42"); GL_PUSH("Date GL42");
// It could be good idea to use stencil in the same time. // It could be good idea to use stencil in the same time.
// Early stencil test will reduce the number of atomic-load operation // Early stencil test will reduce the number of atomic-load operation
@ -1306,13 +1466,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); dev->Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
} }
if (m_ps_sel.hdr) { if (m_ps_sel.hdr)
{
hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA32F); hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA32F);
dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false); dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false);
dev->OMSetRenderTargets(hdr_rt, ds, &scissor); dev->OMSetRenderTargets(hdr_rt, ds, &scissor);
} else { }
else
{
dev->OMSetRenderTargets(rt, ds, &scissor); dev->OMSetRenderTargets(rt, ds, &scissor);
} }
@ -1362,10 +1525,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Depth test should be disabled when depth writes are masked and similarly, Alpha test must be disabled // Depth test should be disabled when depth writes are masked and similarly, Alpha test must be disabled
// when writes to all of the alpha bits in the Framebuffer are masked. // when writes to all of the alpha bits in the Framebuffer are masked.
if (ate_RGBA_then_Z) { if (ate_RGBA_then_Z)
{
z = !m_context->ZBUF.ZMSK; z = !m_context->ZBUF.ZMSK;
r = g = b = a = false; r = g = b = a = false;
} else if (ate_RGB_then_ZA) { }
else if (ate_RGB_then_ZA)
{
z = !m_context->ZBUF.ZMSK; z = !m_context->ZBUF.ZMSK;
a = (m_context->FRAME.FBMSK & 0xFF000000) != 0xFF000000; a = (m_context->FRAME.FBMSK & 0xFF000000) != 0xFF000000;
r = g = b = false; r = g = b = false;
@ -1386,7 +1552,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
} }
} }
if (DATE_GL42) { if (DATE_GL42)
{
dev->RecycleDateTexture(); dev->RecycleDateTexture();
} }
@ -1394,7 +1561,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Warning: EndScene must be called before StretchRect otherwise // Warning: EndScene must be called before StretchRect otherwise
// vertices will be overwritten. Trust me you don't want to do that. // vertices will be overwritten. Trust me you don't want to do that.
if (hdr_rt) { if (hdr_rt)
{
GSVector4 dRect(ComputeBoundingBox(rtscale, rtsize)); GSVector4 dRect(ComputeBoundingBox(rtscale, rtsize));
GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy(); GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
dev->StretchRect(hdr_rt, sRect, rt, dRect, ShaderConvert_MOD_256, false); dev->StretchRect(hdr_rt, sRect, rt, dRect, ShaderConvert_MOD_256, false);

View File

@ -27,13 +27,15 @@
class GSRendererOGL final : public GSRendererHW class GSRendererOGL final : public GSRendererHW
{ {
enum PRIM_OVERLAP { enum PRIM_OVERLAP
{
PRIM_OVERLAP_UNKNOW, PRIM_OVERLAP_UNKNOW,
PRIM_OVERLAP_YES, PRIM_OVERLAP_YES,
PRIM_OVERLAP_NO PRIM_OVERLAP_NO
}; };
enum ACC_BLEND { enum ACC_BLEND
{
ACC_BLEND_NONE = 0, ACC_BLEND_NONE = 0,
ACC_BLEND_BASIC = 1, ACC_BLEND_BASIC = 1,
ACC_BLEND_MEDIUM = 2, ACC_BLEND_MEDIUM = 2,
@ -73,7 +75,7 @@ class GSRendererOGL final : public GSRendererHW
public: public:
GSRendererOGL(); GSRendererOGL();
virtual ~GSRendererOGL() {}; virtual ~GSRendererOGL() {}
bool CreateDevice(GSDevice* dev); bool CreateDevice(GSDevice* dev);

View File

@ -29,9 +29,9 @@
#include "GSdxResources.h" #include "GSdxResources.h"
#endif #endif
GSShaderOGL::GSShaderOGL(bool debug) : GSShaderOGL::GSShaderOGL(bool debug)
m_pipeline(0), : m_pipeline(0)
m_debug_shader(debug) , m_debug_shader(debug)
{ {
theApp.LoadResource(IDR_COMMON_GLSL, m_common_header); theApp.LoadResource(IDR_COMMON_GLSL, m_common_header);
@ -45,8 +45,10 @@ GSShaderOGL::~GSShaderOGL()
printf("Delete %zu Shaders, %zu Programs, %zu Pipelines\n", printf("Delete %zu Shaders, %zu Programs, %zu Pipelines\n",
m_shad_to_delete.size(), m_prog_to_delete.size(), m_pipe_to_delete.size()); m_shad_to_delete.size(), m_prog_to_delete.size(), m_pipe_to_delete.size());
for (auto s : m_shad_to_delete) glDeleteShader(s); for (auto s : m_shad_to_delete)
for (auto p : m_prog_to_delete) glDeleteProgram(p); glDeleteShader(s);
for (auto p : m_prog_to_delete)
glDeleteProgram(p);
glDeleteProgramPipelines(m_pipe_to_delete.size(), &m_pipe_to_delete[0]); glDeleteProgramPipelines(m_pipe_to_delete.size(), &m_pipe_to_delete[0]);
} }
@ -91,7 +93,8 @@ void GSShaderOGL::BindProgram(GLuint vs, GLuint gs, GLuint ps)
{ {
GLuint p = LinkProgram(vs, gs, ps); GLuint p = LinkProgram(vs, gs, ps);
if (GLState::program != p) { if (GLState::program != p)
{
GLState::program = p; GLState::program = p;
glUseProgram(p); glUseProgram(p);
} }
@ -99,7 +102,8 @@ void GSShaderOGL::BindProgram(GLuint vs, GLuint gs, GLuint ps)
void GSShaderOGL::BindProgram(GLuint p) void GSShaderOGL::BindProgram(GLuint p)
{ {
if (GLState::program != p) { if (GLState::program != p)
{
GLState::program = p; GLState::program = p;
glUseProgram(p); glUseProgram(p);
} }
@ -109,12 +113,14 @@ void GSShaderOGL::BindPipeline(GLuint vs, GLuint gs, GLuint ps)
{ {
BindPipeline(m_pipeline); BindPipeline(m_pipeline);
if (GLState::vs != vs) { if (GLState::vs != vs)
{
GLState::vs = vs; GLState::vs = vs;
glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, vs); glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, vs);
} }
if (GLState::gs != gs) { if (GLState::gs != gs)
{
GLState::gs = gs; GLState::gs = gs;
glUseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, gs); glUseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, gs);
} }
@ -133,12 +139,14 @@ void GSShaderOGL::BindPipeline(GLuint vs, GLuint gs, GLuint ps)
void GSShaderOGL::BindPipeline(GLuint pipe) void GSShaderOGL::BindPipeline(GLuint pipe)
{ {
if (GLState::pipeline != pipe) { if (GLState::pipeline != pipe)
{
GLState::pipeline = pipe; GLState::pipeline = pipe;
glBindProgramPipeline(pipe); glBindProgramPipeline(pipe);
} }
if (GLState::program) { if (GLState::program)
{
GLState::program = 0; GLState::program = 0;
glUseProgram(0); glUseProgram(0);
} }
@ -146,15 +154,18 @@ void GSShaderOGL::BindPipeline(GLuint pipe)
bool GSShaderOGL::ValidateShader(GLuint s) bool GSShaderOGL::ValidateShader(GLuint s)
{ {
if (!m_debug_shader) return true; if (!m_debug_shader)
return true;
GLint status = 0; GLint status = 0;
glGetShaderiv(s, GL_COMPILE_STATUS, &status); glGetShaderiv(s, GL_COMPILE_STATUS, &status);
if (status) return true; if (status)
return true;
GLint log_length = 0; GLint log_length = 0;
glGetShaderiv(s, GL_INFO_LOG_LENGTH, &log_length); glGetShaderiv(s, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) { if (log_length > 0)
{
char* log = new char[log_length]; char* log = new char[log_length];
glGetShaderInfoLog(s, log_length, NULL, log); glGetShaderInfoLog(s, log_length, NULL, log);
fprintf(stderr, "%s", log); fprintf(stderr, "%s", log);
@ -167,15 +178,18 @@ bool GSShaderOGL::ValidateShader(GLuint s)
bool GSShaderOGL::ValidateProgram(GLuint p) bool GSShaderOGL::ValidateProgram(GLuint p)
{ {
if (!m_debug_shader) return true; if (!m_debug_shader)
return true;
GLint status = 0; GLint status = 0;
glGetProgramiv(p, GL_LINK_STATUS, &status); glGetProgramiv(p, GL_LINK_STATUS, &status);
if (status) return true; if (status)
return true;
GLint log_length = 0; GLint log_length = 0;
glGetProgramiv(p, GL_INFO_LOG_LENGTH, &log_length); glGetProgramiv(p, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) { if (log_length > 0)
{
char* log = new char[log_length]; char* log = new char[log_length];
glGetProgramInfoLog(p, log_length, NULL, log); glGetProgramInfoLog(p, log_length, NULL, log);
fprintf(stderr, "%s", log); fprintf(stderr, "%s", log);
@ -188,18 +202,21 @@ bool GSShaderOGL::ValidateProgram(GLuint p)
bool GSShaderOGL::ValidatePipeline(GLuint p) bool GSShaderOGL::ValidatePipeline(GLuint p)
{ {
if (!m_debug_shader) return true; if (!m_debug_shader)
return true;
// FIXME: might be mandatory to validate the pipeline // FIXME: might be mandatory to validate the pipeline
glValidateProgramPipeline(p); glValidateProgramPipeline(p);
GLint status = 0; GLint status = 0;
glGetProgramPipelineiv(p, GL_VALIDATE_STATUS, &status); glGetProgramPipelineiv(p, GL_VALIDATE_STATUS, &status);
if (status) return true; if (status)
return true;
GLint log_length = 0; GLint log_length = 0;
glGetProgramPipelineiv(p, GL_INFO_LOG_LENGTH, &log_length); glGetProgramPipelineiv(p, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) { if (log_length > 0)
{
char* log = new char[log_length]; char* log = new char[log_length];
glGetProgramPipelineInfoLog(p, log_length, NULL, log); glGetProgramPipelineInfoLog(p, log_length, NULL, log);
fprintf(stderr, "%s", log); fprintf(stderr, "%s", log);
@ -218,10 +235,13 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
header += "#extension GL_ARB_shading_language_420pack: require\n"; header += "#extension GL_ARB_shading_language_420pack: require\n";
// Need GL version 410 // Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n"; header += "#extension GL_ARB_separate_shader_objects: require\n";
if (GLLoader::found_GL_ARB_shader_image_load_store) { if (GLLoader::found_GL_ARB_shader_image_load_store)
{
// Need GL version 420 // Need GL version 420
header += "#extension GL_ARB_shader_image_load_store: require\n"; header += "#extension GL_ARB_shader_image_load_store: require\n";
} else { }
else
{
header += "#define DISABLE_GL42_image\n"; header += "#define DISABLE_GL42_image\n";
} }
if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel) if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel)
@ -234,7 +254,8 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
header += "#define pGL_ES 0\n"; header += "#define pGL_ES 0\n";
// Allow to puts several shader in 1 files // Allow to puts several shader in 1 files
switch (type) { switch (type)
{
case GL_VERTEX_SHADER: case GL_VERTEX_SHADER:
header += "#define VERTEX_SHADER 1\n"; header += "#define VERTEX_SHADER 1\n";
break; break;
@ -244,7 +265,8 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
case GL_FRAGMENT_SHADER: case GL_FRAGMENT_SHADER:
header += "#define FRAGMENT_SHADER 1\n"; header += "#define FRAGMENT_SHADER 1\n";
break; break;
default: ASSERT(0); default:
ASSERT(0);
} }
// Select the entry point ie the main function // Select the entry point ie the main function
@ -276,7 +298,8 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent
bool status = ValidateProgram(program); bool status = ValidateProgram(program);
if (!status) { if (!status)
{
// print extra info // print extra info
fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), program); fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), program);
fprintf(stderr, "\n%s", macro_sel.c_str()); fprintf(stderr, "\n%s", macro_sel.c_str());
@ -312,7 +335,8 @@ GLuint GSShaderOGL::CompileShader(const std::string& glsl_file, const std::strin
bool status = ValidateShader(shader); bool status = ValidateShader(shader);
if (!status) { if (!status)
{
// print extra info // print extra info
fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), shader); fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), shader);
fprintf(stderr, "\n%s", macro_sel.c_str()); fprintf(stderr, "\n%s", macro_sel.c_str());
@ -331,7 +355,8 @@ GLuint GSShaderOGL::CompileShader(const std::string& glsl_file, const std::strin
// GLSL improvement (unfortunately). // GLSL improvement (unfortunately).
int GSShaderOGL::DumpAsm(const std::string& file, GLuint p) int GSShaderOGL::DumpAsm(const std::string& file, GLuint p)
{ {
if (!GLLoader::vendor_id_nvidia) return 0; if (!GLLoader::vendor_id_nvidia)
return 0;
GLint binaryLength; GLint binaryLength;
glGetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength); glGetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
@ -345,18 +370,24 @@ int GSShaderOGL::DumpAsm(const std::string& file, GLuint p)
// Search the magic number "!!" // Search the magic number "!!"
int asm_ = 0; int asm_ = 0;
while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_+1] != '!')) { while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_ + 1] != '!'))
{
asm_ += 1; asm_ += 1;
} }
int instructions = -1; int instructions = -1;
if (asm_ < binaryLength) { if (asm_ < binaryLength)
{
// Now print asm as text // Now print asm as text
char* asm_txt = strtok(&binary[asm_], "\n"); char* asm_txt = strtok(&binary[asm_], "\n");
while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5))) { while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5)))
if (!strncmp(asm_txt, "OUT", 3) || !strncmp(asm_txt, "TEMP", 4) || !strncmp(asm_txt, "LONG", 4)) { {
if (!strncmp(asm_txt, "OUT", 3) || !strncmp(asm_txt, "TEMP", 4) || !strncmp(asm_txt, "LONG", 4))
{
instructions = 0; instructions = 0;
} else if (instructions >= 0) { }
else if (instructions >= 0)
{
if (instructions == 0) if (instructions == 0)
fprintf(outfile, "\n"); fprintf(outfile, "\n");
instructions++; instructions++;
@ -369,7 +400,8 @@ int GSShaderOGL::DumpAsm(const std::string& file, GLuint p)
} }
fclose(outfile); fclose(outfile);
if (instructions < 0) { if (instructions < 0)
{
// RAW dump in case of error // RAW dump in case of error
fprintf(stderr, "Error: failed to find the number of instructions!\n"); fprintf(stderr, "Error: failed to find the number of instructions!\n");
outfile = fopen(file.c_str(), "wb"); outfile = fopen(file.c_str(), "wb");

View File

@ -21,7 +21,8 @@
#pragma once #pragma once
class GSShaderOGL { class GSShaderOGL
{
GLuint m_pipeline; GLuint m_pipeline;
std::unordered_map<uint32, GLuint> m_program; std::unordered_map<uint32, GLuint> m_program;
const bool m_debug_shader; const bool m_debug_shader;

View File

@ -123,13 +123,15 @@ void GSTextureCacheOGL::Read(Source* t, const GSVector4i& r)
// FIXME Create a get function to avoid the useless copy // FIXME Create a get function to avoid the useless copy
// Note: With openGL 4.5 you can use glGetTextureSubImage // Note: With openGL 4.5 you can use glGetTextureSubImage
if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) { if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height()))
{
m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r); m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r);
GSTexture::GSMap m; GSTexture::GSMap m;
GSVector4i r_offscreen(0, 0, r.width(), r.height()); GSVector4i r_offscreen(0, 0, r.width(), r.height());
if (offscreen->Map(m, &r_offscreen)) { if (offscreen->Map(m, &r_offscreen))
{
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);

View File

@ -30,7 +30,8 @@ extern uint64 g_real_texture_upload_byte;
#endif #endif
// FIXME OGL4: investigate, only 1 unpack buffer always bound // FIXME OGL4: investigate, only 1 unpack buffer always bound
namespace PboPool { namespace PboPool
{
const uint32 m_pbo_size = 64 * 1024 * 1024; const uint32 m_pbo_size = 64 * 1024 * 1024;
const uint32 m_seg_size = 16 * 1024 * 1024; const uint32 m_seg_size = 16 * 1024 * 1024;
@ -49,7 +50,8 @@ namespace PboPool {
const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT; const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT;
const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT; const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT;
void Init() { void Init()
{
glGenBuffers(1, &m_buffer); glGenBuffers(1, &m_buffer);
BindPbo(); BindPbo();
@ -60,19 +62,22 @@ namespace PboPool {
m_map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags); m_map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags);
m_offset = 0; m_offset = 0;
for (size_t i = 0; i < countof(m_fence); i++) { for (size_t i = 0; i < countof(m_fence); i++)
{
m_fence[i] = 0; m_fence[i] = 0;
} }
UnbindPbo(); UnbindPbo();
} }
char* Map(uint32 size) { char* Map(uint32 size)
{
char* map; char* map;
// Note: keep offset aligned for SSE/AVX // Note: keep offset aligned for SSE/AVX
m_size = (size + 63) & ~0x3F; m_size = (size + 63) & ~0x3F;
if (m_size > m_pbo_size) { if (m_size > m_pbo_size)
{
fprintf(stderr, "BUG: PBO too small %u but need %u\n", m_pbo_size, m_size); fprintf(stderr, "BUG: PBO too small %u but need %u\n", m_pbo_size, m_size);
} }
@ -87,41 +92,50 @@ namespace PboPool {
return map; return map;
} }
void Unmap() { void Unmap()
{
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset, m_size); glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset, m_size);
} }
uptr Offset() { uptr Offset()
{
return m_offset; return m_offset;
} }
void Destroy() { void Destroy()
{
m_map = NULL; m_map = NULL;
m_offset = 0; m_offset = 0;
for (size_t i = 0; i < countof(m_fence); i++) { for (size_t i = 0; i < countof(m_fence); i++)
{
glDeleteSync(m_fence[i]); glDeleteSync(m_fence[i]);
} }
glDeleteBuffers(1, &m_buffer); glDeleteBuffers(1, &m_buffer);
} }
void BindPbo() { void BindPbo()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer);
} }
void Sync() { void Sync()
{
uint32 segment_current = m_offset / m_seg_size; uint32 segment_current = m_offset / m_seg_size;
uint32 segment_next = (m_offset + m_size) / m_seg_size; uint32 segment_next = (m_offset + m_size) / m_seg_size;
if (segment_current != segment_next) { if (segment_current != segment_next)
if (segment_next >= countof(m_fence)) { {
if (segment_next >= countof(m_fence))
{
segment_next = 0; segment_next = 0;
} }
// Align current transfer on the start of the segment // Align current transfer on the start of the segment
m_offset = m_seg_size * segment_next; m_offset = m_seg_size * segment_next;
if (m_size > m_seg_size) { if (m_size > m_seg_size)
{
fprintf(stderr, "BUG: PBO Map size %u is bigger than a single segment %u. Crossing more than one fence is not supported yet, texture data may be corrupted.\n", m_size, m_seg_size); fprintf(stderr, "BUG: PBO Map size %u is bigger than a single segment %u. Crossing more than one fence is not supported yet, texture data may be corrupted.\n", m_size, m_seg_size);
// TODO Synchronize all crossed fences // TODO Synchronize all crossed fences
} }
@ -130,10 +144,12 @@ namespace PboPool {
m_fence[segment_current] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); m_fence[segment_current] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
// Check next segment is free // Check next segment is free
if (m_fence[segment_next]) { if (m_fence[segment_next])
{
GLenum status = glClientWaitSync(m_fence[segment_next], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); GLenum status = glClientWaitSync(m_fence[segment_next], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
// Potentially it doesn't work on AMD driver which might always return GL_CONDITION_SATISFIED // Potentially it doesn't work on AMD driver which might always return GL_CONDITION_SATISFIED
if (status != GL_ALREADY_SIGNALED) { if (status != GL_ALREADY_SIGNALED)
{
GL_PERF("GL_PIXEL_UNPACK_BUFFER: Sync Sync (%x)! Buffer too small ?", status); GL_PERF("GL_PIXEL_UNPACK_BUFFER: Sync Sync (%x)! Buffer too small ?", status);
} }
@ -143,14 +159,16 @@ namespace PboPool {
} }
} }
void UnbindPbo() { void UnbindPbo()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
} }
void EndTransfer() { void EndTransfer()
{
m_offset += m_size; m_offset += m_size;
} }
} } // namespace PboPool
GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap) GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap)
: m_clean(false), m_generate_mipmap(true), m_local_buffer(nullptr), m_r_x(0), m_r_y(0), m_r_w(0), m_r_h(0), m_layer(0) : m_clean(false), m_generate_mipmap(true), m_local_buffer(nullptr), m_r_x(0), m_r_y(0), m_r_w(0), m_r_h(0), m_layer(0)
@ -166,7 +184,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
m_max_layer = 1; m_max_layer = 1;
// Bunch of constant parameter // Bunch of constant parameter
switch (m_format) { switch (m_format)
{
// 1 Channel integer // 1 Channel integer
case GL_R32UI: case GL_R32UI:
case GL_R32I: case GL_R32I:
@ -240,7 +259,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
ASSERT(0); ASSERT(0);
} }
switch (m_type) { switch (m_type)
{
case GSTexture::Backbuffer: case GSTexture::Backbuffer:
return; // backbuffer isn't a real texture return; // backbuffer isn't a real texture
case GSTexture::Offscreen: case GSTexture::Offscreen:
@ -259,7 +279,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
break; break;
} }
switch (m_format) { switch (m_format)
{
case GL_R16UI: case GL_R16UI:
case GL_R8: case GL_R8:
m_sparse &= GLLoader::found_compatible_GL_ARB_sparse_texture2; m_sparse &= GLLoader::found_compatible_GL_ARB_sparse_texture2;
@ -294,21 +315,26 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
// Create a gl object (texture isn't allocated here) // Create a gl object (texture isn't allocated here)
glCreateTextures(GL_TEXTURE_2D, 1, &m_texture_id); glCreateTextures(GL_TEXTURE_2D, 1, &m_texture_id);
if (m_format == GL_R8) { if (m_format == GL_R8)
{
// Emulate DX behavior, beside it avoid special code in shader to differentiate // Emulate DX behavior, beside it avoid special code in shader to differentiate
// palette texture from a GL_RGBA target or a GL_R texture. // palette texture from a GL_RGBA target or a GL_R texture.
glTextureParameteri(m_texture_id, GL_TEXTURE_SWIZZLE_A, GL_RED); glTextureParameteri(m_texture_id, GL_TEXTURE_SWIZZLE_A, GL_RED);
} }
if (m_sparse) { if (m_sparse)
{
GSVector2i old_size = m_size; GSVector2i old_size = m_size;
m_size = RoundUpPage(m_size); m_size = RoundUpPage(m_size);
if (m_size != old_size) { if (m_size != old_size)
{
fprintf(stderr, "Sparse texture size (%dx%d) isn't a multiple of gpu page size (%dx%d)\n", fprintf(stderr, "Sparse texture size (%dx%d) isn't a multiple of gpu page size (%dx%d)\n",
old_size.x, old_size.y, m_gpu_page_size.x, m_gpu_page_size.y); old_size.x, old_size.y, m_gpu_page_size.x, m_gpu_page_size.y);
} }
glTextureParameteri(m_texture_id, GL_TEXTURE_SPARSE_ARB, true); glTextureParameteri(m_texture_id, GL_TEXTURE_SPARSE_ARB, true);
} else { }
else
{
m_committed_size = m_size; m_committed_size = m_size;
} }
@ -316,7 +342,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
static int every_512 = 0; static int every_512 = 0;
GLState::available_vram -= m_mem_usage; GLState::available_vram -= m_mem_usage;
if ((GLState::available_vram < 0) && (every_512 % 512 == 0)) { if ((GLState::available_vram < 0) && (every_512 % 512 == 0))
{
fprintf(stderr, "Available VRAM is very low (%lld), a crash is expected! Enable conservative buffer allocation or reduce upscaling!\n", GLState::available_vram); fprintf(stderr, "Available VRAM is very low (%lld), a crash is expected! Enable conservative buffer allocation or reduce upscaling!\n", GLState::available_vram);
every_512++; every_512++;
// Pull emergency break // Pull emergency break
@ -334,7 +361,8 @@ GSTextureOGL::~GSTextureOGL()
GLState::rt = 0; GLState::rt = 0;
if (m_texture_id == GLState::ds) if (m_texture_id == GLState::ds)
GLState::ds = 0; GLState::ds = 0;
for (size_t i = 0; i < countof(GLState::tex_unit); i++) { for (size_t i = 0; i < countof(GLState::tex_unit); i++)
{
if (m_texture_id == GLState::tex_unit[i]) if (m_texture_id == GLState::tex_unit[i])
GLState::tex_unit[i] = 0; GLState::tex_unit[i] = 0;
} }
@ -406,7 +434,8 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
// PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch // PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch
// Note: row_byte != pitch // Note: row_byte != pitch
for (int h = 0; h < r.height(); h++) { for (int h = 0; h < r.height(); h++)
{
memcpy(map, src, row_byte); memcpy(map, src, row_byte);
map += row_byte; map += row_byte;
src += pitch; src += pitch;
@ -440,7 +469,8 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
uint32 row_byte = r.width() << m_int_shift; uint32 row_byte = r.width() << m_int_shift;
m.pitch = row_byte; m.pitch = row_byte;
if (m_type == GSTexture::Offscreen) { if (m_type == GSTexture::Offscreen)
{
// The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GSdx // The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GSdx
// architecture is waiting the data right now. // architecture is waiting the data right now.
@ -467,7 +497,9 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
m.bits = m_local_buffer; m.bits = m_local_buffer;
return true; return true;
} else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { }
else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget)
{
GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap
m_clean = false; m_clean = false;
@ -495,7 +527,8 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
void GSTextureOGL::Unmap() void GSTextureOGL::Unmap()
{ {
if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget)
{
PboPool::Unmap(); PboPool::Unmap();
@ -514,7 +547,8 @@ void GSTextureOGL::Unmap()
void GSTextureOGL::GenerateMipmap() void GSTextureOGL::GenerateMipmap()
{ {
if (m_generate_mipmap && m_max_layer > 1) { if (m_generate_mipmap && m_max_layer > 1)
{
glGenerateTextureMipmap(m_texture_id); glGenerateTextureMipmap(m_texture_id);
m_generate_mipmap = false; m_generate_mipmap = false;
} }
@ -524,12 +558,16 @@ void GSTextureOGL::CommitPages(const GSVector2i& region, bool commit)
{ {
GLState::available_vram += m_mem_usage; GLState::available_vram += m_mem_usage;
if (commit) { if (commit)
if (m_committed_size.x == 0) { {
if (m_committed_size.x == 0)
{
// Nothing allocated so far // Nothing allocated so far
GL_INS("CommitPages initial %dx%d of %u", region.x, region.y, m_texture_id); GL_INS("CommitPages initial %dx%d of %u", region.x, region.y, m_texture_id);
glTexturePageCommitmentEXT(m_texture_id, GL_TEX_LEVEL_0, 0, 0, 0, region.x, region.y, 1, commit); glTexturePageCommitmentEXT(m_texture_id, GL_TEX_LEVEL_0, 0, 0, 0, region.x, region.y, 1, commit);
} else { }
else
{
GL_INS("CommitPages extend %dx%d to %dx%d of %u", m_committed_size.x, m_committed_size.y, region.x, region.y, m_texture_id); GL_INS("CommitPages extend %dx%d to %dx%d of %u", m_committed_size.x, m_committed_size.y, region.x, region.y, m_texture_id);
int w = region.x - m_committed_size.x; int w = region.x - m_committed_size.x;
int h = region.y - m_committed_size.y; int h = region.y - m_committed_size.y;
@ -539,8 +577,9 @@ void GSTextureOGL::CommitPages(const GSVector2i& region, bool commit)
glTexturePageCommitmentEXT(m_texture_id, GL_TEX_LEVEL_0, 0, m_committed_size.y, 0, region.x, h, 1, commit); glTexturePageCommitmentEXT(m_texture_id, GL_TEX_LEVEL_0, 0, m_committed_size.y, 0, region.x, h, 1, commit);
} }
m_committed_size = region; m_committed_size = region;
}
} else { else
{
// Release everything // Release everything
GL_INS("CommitPages release of %u", m_texture_id); GL_INS("CommitPages release of %u", m_texture_id);
@ -565,9 +604,12 @@ bool GSTextureOGL::Save(const std::string& fn)
GSPng::Format fmt = GSPng::RGB_PNG; GSPng::Format fmt = GSPng::RGB_PNG;
#endif #endif
if (IsBackbuffer()) { if (IsBackbuffer())
{
glReadPixels(0, 0, m_committed_size.x, m_committed_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get()); glReadPixels(0, 0, m_committed_size.x, m_committed_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get());
} else if(IsDss()) { }
else if (IsDss())
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_texture_id, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_texture_id, 0);
@ -576,18 +618,23 @@ bool GSTextureOGL::Save(const std::string& fn)
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
fmt = GSPng::RGB_A_PNG; fmt = GSPng::RGB_A_PNG;
} else if(m_format == GL_R32I) { }
else if (m_format == GL_R32I)
{
// Note: 4.5 function used for accurate DATE // Note: 4.5 function used for accurate DATE
// barely used outside of dev and not sparse anyway // barely used outside of dev and not sparse anyway
glGetTextureImage(m_texture_id, 0, GL_RED_INTEGER, GL_INT, buf_size, image.get()); glGetTextureImage(m_texture_id, 0, GL_RED_INTEGER, GL_INT, buf_size, image.get());
fmt = GSPng::R32I_PNG; fmt = GSPng::R32I_PNG;
} else { }
else
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0);
if (m_format == GL_RGBA8) { if (m_format == GL_RGBA8)
{
glReadPixels(0, 0, m_committed_size.x, m_committed_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get()); glReadPixels(0, 0, m_committed_size.x, m_committed_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get());
} }
else if (m_format == GL_R16UI) else if (m_format == GL_R16UI)

View File

@ -23,7 +23,8 @@
#include "Renderers/Common/GSTexture.h" #include "Renderers/Common/GSTexture.h"
namespace PboPool { namespace PboPool
{
inline void BindPbo(); inline void BindPbo();
inline void UnbindPbo(); inline void UnbindPbo();
inline void Sync(); inline void Sync();
@ -35,7 +36,7 @@ namespace PboPool {
void Init(); void Init();
void Destroy(); void Destroy();
} } // namespace PboPool
class GSTextureOGL final : public GSTexture class GSTextureOGL final : public GSTexture
{ {

View File

@ -28,7 +28,8 @@ extern uint64 g_uniform_upload_byte;
#endif #endif
class GSUniformBufferOGL { class GSUniformBufferOGL
{
GLuint m_buffer; // data object GLuint m_buffer; // data object
GLuint m_index; // GLSL slot GLuint m_index; // GLSL slot
uint32 m_size; // size of the data uint32 m_size; // size of the data
@ -49,7 +50,8 @@ public:
void bind() void bind()
{ {
if (GLState::ubo != m_buffer) { if (GLState::ubo != m_buffer)
{
GLState::ubo = m_buffer; GLState::ubo = m_buffer;
glBindBuffer(GL_UNIFORM_BUFFER, m_buffer); glBindBuffer(GL_UNIFORM_BUFFER, m_buffer);
} }
@ -82,7 +84,8 @@ public:
void cache_upload(const void* src) void cache_upload(const void* src)
{ {
if (memcmp(m_cache, src, m_size) != 0) { if (memcmp(m_cache, src, m_size) != 0)
{
memcpy(m_cache, src, m_size); memcpy(m_cache, src, m_size);
upload(src); upload(src);
} }
@ -97,7 +100,8 @@ public:
#define UBO_BUFFER_SIZE (4 * 1024 * 1024) #define UBO_BUFFER_SIZE (4 * 1024 * 1024)
class GSUniformBufferStorageOGL { class GSUniformBufferStorageOGL
{
GLuint m_buffer; // data object GLuint m_buffer; // data object
GLuint m_index; // GLSL slot GLuint m_index; // GLSL slot
uint32 m_size; // size of the data uint32 m_size; // size of the data
@ -105,8 +109,8 @@ class GSUniformBufferStorageOGL {
uint32 m_offset; uint32 m_offset;
public: public:
GSUniformBufferStorageOGL(GLuint index, uint32 size) : m_index(index) GSUniformBufferStorageOGL(GLuint index, uint32 size)
, m_size(size), m_offset(0) : m_index(index) , m_size(size) , m_offset(0)
{ {
glGenBuffers(1, &m_buffer); glGenBuffers(1, &m_buffer);
bind(); bind();
@ -116,7 +120,8 @@ public:
void bind() void bind()
{ {
if (GLState::ubo != m_buffer) { if (GLState::ubo != m_buffer)
{
GLState::ubo = m_buffer; GLState::ubo = m_buffer;
glBindBuffer(GL_UNIFORM_BUFFER, m_buffer); glBindBuffer(GL_UNIFORM_BUFFER, m_buffer);
} }
@ -159,7 +164,8 @@ public:
m_offset = 0; m_offset = 0;
} }
~GSUniformBufferStorageOGL() { ~GSUniformBufferStorageOGL()
{
glDeleteBuffers(1, &m_buffer); glDeleteBuffers(1, &m_buffer);
} }
}; };

View File

@ -27,7 +27,8 @@
extern uint64 g_vertex_upload_byte; extern uint64 g_vertex_upload_byte;
#endif #endif
struct GSInputLayoutOGL { struct GSInputLayoutOGL
{
GLint location; GLint location;
GLint size; GLint size;
GLenum type; GLenum type;
@ -37,7 +38,8 @@ struct GSInputLayoutOGL {
}; };
template <int STRIDE> template <int STRIDE>
class GSBufferOGL { class GSBufferOGL
{
size_t m_start; size_t m_start;
size_t m_count; size_t m_count;
size_t m_limit; size_t m_limit;
@ -60,7 +62,8 @@ class GSBufferOGL {
m_limit = 1u << (1u + (size_t)std::log2(count - 1u)); m_limit = 1u << (1u + (size_t)std::log2(count - 1u));
m_quarter_shift = (size_t)std::log2(m_limit * STRIDE) - 2; m_quarter_shift = (size_t)std::log2(m_limit * STRIDE) - 2;
for (size_t i = 0; i < 5; i++) { for (size_t i = 0; i < 5; i++)
{
m_fence[i] = 0; m_fence[i] = 0;
} }
@ -80,14 +83,17 @@ class GSBufferOGL {
glBufferStorage(m_target, STRIDE * m_limit, NULL, create_flags); glBufferStorage(m_target, STRIDE * m_limit, NULL, create_flags);
m_buffer_ptr = (uint8*)glMapBufferRange(m_target, 0, STRIDE * m_limit, map_flags); m_buffer_ptr = (uint8*)glMapBufferRange(m_target, 0, STRIDE * m_limit, map_flags);
if (!m_buffer_ptr) { if (!m_buffer_ptr)
{
fprintf(stderr, "Failed to map buffer\n"); fprintf(stderr, "Failed to map buffer\n");
throw GSDXError(); throw GSDXError();
} }
} }
~GSBufferOGL() { ~GSBufferOGL()
for (size_t i = 0; i < 5; i++) { {
for (size_t i = 0; i < 5; i++)
{
glDeleteSync(m_fence[i]); glDeleteSync(m_fence[i]);
} }
glDeleteBuffers(1, &m_buffer_name); glDeleteBuffers(1, &m_buffer_name);
@ -108,14 +114,16 @@ class GSBufferOGL {
size_t offset = m_start * STRIDE; size_t offset = m_start * STRIDE;
size_t length = m_count * STRIDE; size_t length = m_count * STRIDE;
if (m_count > (m_limit - m_start) ) { if (m_count > (m_limit - m_start))
{
size_t current_chunk = offset >> m_quarter_shift; size_t current_chunk = offset >> m_quarter_shift;
#ifdef ENABLE_OGL_DEBUG_FENCE #ifdef ENABLE_OGL_DEBUG_FENCE
fprintf(stderr, "%x: Wrap buffer\n", m_target); fprintf(stderr, "%x: Wrap buffer\n", m_target);
fprintf(stderr, "%x: Insert a fence in chunk %zu\n", m_target, current_chunk); fprintf(stderr, "%x: Insert a fence in chunk %zu\n", m_target, current_chunk);
#endif #endif
ASSERT(current_chunk > 0 && current_chunk < 5); ASSERT(current_chunk > 0 && current_chunk < 5);
if (m_fence[current_chunk] == 0) { if (m_fence[current_chunk] == 0)
{
m_fence[current_chunk] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); m_fence[current_chunk] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
} }
@ -124,10 +132,12 @@ class GSBufferOGL {
offset = 0; offset = 0;
// Only check first chunk // Only check first chunk
if (m_fence[0]) { if (m_fence[0])
{
#ifdef ENABLE_OGL_DEBUG_FENCE #ifdef ENABLE_OGL_DEBUG_FENCE
GLenum status = glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); GLenum status = glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
if (status != GL_ALREADY_SIGNALED) { if (status != GL_ALREADY_SIGNALED)
{
fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target); fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target);
} }
#else #else
@ -141,13 +151,15 @@ class GSBufferOGL {
// Protect buffer with fences // Protect buffer with fences
size_t current_chunk = offset >> m_quarter_shift; size_t current_chunk = offset >> m_quarter_shift;
size_t next_chunk = (offset + length) >> m_quarter_shift; size_t next_chunk = (offset + length) >> m_quarter_shift;
for (size_t c = current_chunk + 1; c <= next_chunk; c++) { for (size_t c = current_chunk + 1; c <= next_chunk; c++)
{
#ifdef ENABLE_OGL_DEBUG_FENCE #ifdef ENABLE_OGL_DEBUG_FENCE
fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c - 1); fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c - 1);
#endif #endif
ASSERT(c > 0 && c < 5); ASSERT(c > 0 && c < 5);
m_fence[c - 1] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); m_fence[c - 1] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
if (m_fence[c]) { if (m_fence[c])
{
#ifdef ENABLE_OGL_DEBUG_FENCE #ifdef ENABLE_OGL_DEBUG_FENCE
GLenum status = glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); GLenum status = glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
#else #else
@ -157,7 +169,8 @@ class GSBufferOGL {
m_fence[c] = 0; m_fence[c] = 0;
#ifdef ENABLE_OGL_DEBUG_FENCE #ifdef ENABLE_OGL_DEBUG_FENCE
if (status != GL_ALREADY_SIGNALED) { if (status != GL_ALREADY_SIGNALED)
{
fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target); fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target);
} }
#endif #endif
@ -211,10 +224,10 @@ class GSBufferOGL {
} }
size_t GetStart() { return m_start; } size_t GetStart() { return m_start; }
}; };
class GSVertexBufferStateOGL { class GSVertexBufferStateOGL
{
std::unique_ptr<GSBufferOGL<sizeof(GSVertexPT1)>> m_vb; std::unique_ptr<GSBufferOGL<sizeof(GSVertexPT1)>> m_vb;
std::unique_ptr<GSBufferOGL<sizeof(uint32)>> m_ib; std::unique_ptr<GSBufferOGL<sizeof(uint32)>> m_ib;
@ -226,7 +239,8 @@ class GSVertexBufferStateOGL {
GSVertexBufferStateOGL(const GSVertexBufferStateOGL&) = delete; GSVertexBufferStateOGL(const GSVertexBufferStateOGL&) = delete;
public: public:
GSVertexBufferStateOGL(const std::vector<GSInputLayoutOGL>& layout) : m_topology(0), m_layout(layout) GSVertexBufferStateOGL(const std::vector<GSInputLayoutOGL>& layout)
: m_topology(0), m_layout(layout)
{ {
glGenVertexArrays(1, &m_va); glGenVertexArrays(1, &m_va);
glBindVertexArray(m_va); glBindVertexArray(m_va);
@ -250,15 +264,20 @@ public:
void set_internal_format() void set_internal_format()
{ {
for (const auto &l : m_layout) { for (const auto& l : m_layout)
{
// Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer // Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer
glEnableVertexAttribArray(l.location); glEnableVertexAttribArray(l.location);
switch (l.type) { switch (l.type)
{
case GL_UNSIGNED_SHORT: case GL_UNSIGNED_SHORT:
case GL_UNSIGNED_INT: case GL_UNSIGNED_INT:
if (l.normalize) { if (l.normalize)
{
glVertexAttribPointer(l.location, l.size, l.type, l.normalize, l.stride, l.offset); glVertexAttribPointer(l.location, l.size, l.type, l.normalize, l.stride, l.offset);
} else { }
else
{
// Rule: when shader use integral (not normalized) you must use glVertexAttribIPointer (note the extra I) // Rule: when shader use integral (not normalized) you must use glVertexAttribIPointer (note the extra I)
glVertexAttribIPointer(l.location, l.size, l.type, l.stride, l.offset); glVertexAttribIPointer(l.location, l.size, l.type, l.stride, l.offset);
} }
@ -286,13 +305,18 @@ public:
void SetTopology(GLenum topology) { m_topology = topology; } void SetTopology(GLenum topology) { m_topology = topology; }
void* MapVB(size_t count) { void* MapVB(size_t count)
{
void* ptr; void* ptr;
while (true) { while (true)
try { {
try
{
ptr = m_vb->map(count); ptr = m_vb->map(count);
break; break;
} catch (GSDXErrorGlVertexArrayTooSmall) { }
catch (GSDXErrorGlVertexArrayTooSmall)
{
GL_INS("GL vertex buffer is too small"); GL_INS("GL vertex buffer is too small");
m_vb.reset(new GSBufferOGL<sizeof(GSVertexPT1)>(GL_ARRAY_BUFFER, count)); m_vb.reset(new GSBufferOGL<sizeof(GSVertexPT1)>(GL_ARRAY_BUFFER, count));
@ -304,12 +328,17 @@ public:
return ptr; return ptr;
} }
void UnmapVB() { m_vb->unmap(); } void UnmapVB() { m_vb->unmap(); }
void UploadVB(const void* vertices, size_t count) { void UploadVB(const void* vertices, size_t count)
while (true) { {
try { while (true)
{
try
{
m_vb->upload(vertices, count); m_vb->upload(vertices, count);
break; break;
} catch (GSDXErrorGlVertexArrayTooSmall) { }
catch (GSDXErrorGlVertexArrayTooSmall)
{
GL_INS("GL vertex buffer is too small"); GL_INS("GL vertex buffer is too small");
m_vb.reset(new GSBufferOGL<sizeof(GSVertexPT1)>(GL_ARRAY_BUFFER, count)); m_vb.reset(new GSBufferOGL<sizeof(GSVertexPT1)>(GL_ARRAY_BUFFER, count));
@ -319,12 +348,17 @@ public:
} }
} }
void UploadIB(const void* index, size_t count) { void UploadIB(const void* index, size_t count)
while (true) { {
try { while (true)
{
try
{
m_ib->upload(index, count); m_ib->upload(index, count);
break; break;
} catch (GSDXErrorGlVertexArrayTooSmall) { }
catch (GSDXErrorGlVertexArrayTooSmall)
{
GL_INS("GL index buffer is too small"); GL_INS("GL index buffer is too small");
m_ib.reset(new GSBufferOGL<sizeof(uint32)>(GL_ELEMENT_ARRAY_BUFFER, count)); m_ib.reset(new GSBufferOGL<sizeof(uint32)>(GL_ELEMENT_ARRAY_BUFFER, count));
@ -336,5 +370,4 @@ public:
{ {
glDeleteVertexArrays(1, &m_va); glDeleteVertexArrays(1, &m_va);
} }
}; };

View File

@ -272,7 +272,8 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co
c = c.upl16(c.zwxy()); c = c.upl16(c.zwxy());
if(sel.tfx == TFX_NONE) c = c.srl16(7); if (sel.tfx == TFX_NONE)
c = c.srl16(7);
m_local.c.rb = c.xxxx(); m_local.c.rb = c.xxxx();
m_local.c.ga = c.zzzz(); m_local.c.ga = c.zzzz();
@ -418,7 +419,8 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co
c = c.upl16(c.zwxy()); c = c.upl16(c.zwxy());
if(sel.tfx == TFX_NONE) c = c.srl16(7); if (sel.tfx == TFX_NONE)
c = c.srl16(7);
m_local.c.rb = c.xxxx(); m_local.c.rb = c.xxxx();
m_local.c.ga = c.zzzz(); m_local.c.ga = c.zzzz();
@ -596,7 +598,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
case ZTST_GREATER: test |= zso <= zdo; break; case ZTST_GREATER: test |= zso <= zdo; break;
} }
if(test.alltrue()) continue; if (test.alltrue())
continue;
} }
} }
@ -904,7 +907,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
ga2 = c00.srl16(8); ga2 = c00.srl16(8);
} }
if(sel.lcm) lodf = m_global.lod.f; if (sel.lcm)
lodf = m_global.lod.f;
lodf = lodf.srl16(1); lodf = lodf.srl16(1);
@ -1042,16 +1046,19 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
{ {
case TFX_MODULATE: case TFX_MODULATE:
ga = ga.modulate16<1>(gaf).clamp8(); ga = ga.modulate16<1>(gaf).clamp8();
if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); if (!sel.tcc)
ga = ga.mix16(gaf.srl16(7));
break; break;
case TFX_DECAL: case TFX_DECAL:
if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); if (!sel.tcc)
ga = ga.mix16(gaf.srl16(7));
break; break;
case TFX_HIGHLIGHT: case TFX_HIGHLIGHT:
ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7))); ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7)));
break; break;
case TFX_HIGHLIGHT2: case TFX_HIGHLIGHT2:
if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); if (!sel.tcc)
ga = ga.mix16(gaf.srl16(7));
break; break;
case TFX_NONE: case TFX_NONE:
ga = sel.iip ? gaf.srl16(7) : gaf; ga = sel.iip ? gaf.srl16(7) : gaf;
@ -1089,7 +1096,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
// TestAlpha // TestAlpha
if(!TestAlpha(test, fm, zm, ga)) continue; if (!TestAlpha(test, fm, zm, ga))
continue;
// ColorTFX // ColorTFX
@ -1180,7 +1188,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
} }
} }
if(test.alltrue()) continue; if (test.alltrue())
continue;
} }
// WriteMask // WriteMask
@ -1479,12 +1488,13 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
} }
} }
} }
} } while (0);
while(0);
if(sel.edge) break; if (sel.edge)
break;
if(steps <= 0) break; if (steps <= 0)
break;
// Step // Step
@ -1718,7 +1728,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
case ZTST_GREATER: test |= zso <= zdo; break; case ZTST_GREATER: test |= zso <= zdo; break;
} }
if(test.alltrue()) continue; if (test.alltrue())
continue;
} }
} }
@ -2039,7 +2050,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
ga2 = c00.srl16(8); ga2 = c00.srl16(8);
} }
if(sel.lcm) lodf = m_global.lod.f; if (sel.lcm)
lodf = m_global.lod.f;
lodf = lodf.srl16(1); lodf = lodf.srl16(1);
@ -2174,16 +2186,19 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
{ {
case TFX_MODULATE: case TFX_MODULATE:
ga = ga.modulate16<1>(gaf).clamp8(); ga = ga.modulate16<1>(gaf).clamp8();
if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); if (!sel.tcc)
ga = ga.mix16(gaf.srl16(7));
break; break;
case TFX_DECAL: case TFX_DECAL:
if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); if (!sel.tcc)
ga = ga.mix16(gaf.srl16(7));
break; break;
case TFX_HIGHLIGHT: case TFX_HIGHLIGHT:
ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7))); ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7)));
break; break;
case TFX_HIGHLIGHT2: case TFX_HIGHLIGHT2:
if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); if (!sel.tcc)
ga = ga.mix16(gaf.srl16(7));
break; break;
case TFX_NONE: case TFX_NONE:
ga = sel.iip ? gaf.srl16(7) : gaf; ga = sel.iip ? gaf.srl16(7) : gaf;
@ -2221,7 +2236,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
// TestAlpha // TestAlpha
if(!TestAlpha(test, fm, zm, ga)) continue; if (!TestAlpha(test, fm, zm, ga))
continue;
// ColorTFX // ColorTFX
@ -2307,7 +2323,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
} }
} }
if(test.alltrue()) continue; if (test.alltrue())
continue;
} }
// WriteMask // WriteMask
@ -2589,12 +2606,13 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
} }
} }
} }
} } while (0);
while(0);
if(sel.edge) break; if (sel.edge)
break;
if(steps <= 0) break; if (steps <= 0)
break;
// Step // Step
@ -2683,15 +2701,18 @@ bool GSDrawScanline::TestAlpha(T& test, T& fm, T& zm, const T& ga)
switch (sel.afail) switch (sel.afail)
{ {
case AFAIL_FB_ONLY: case AFAIL_FB_ONLY:
if(!sel.zwrite) return true; if (!sel.zwrite)
return true;
break; break;
case AFAIL_ZB_ONLY: case AFAIL_ZB_ONLY:
if(!sel.fwrite) return true; if (!sel.fwrite)
return true;
break; break;
case AFAIL_RGB_ONLY: case AFAIL_RGB_ONLY:
if(!sel.zwrite && sel.fpsm == 1) return true; if (!sel.zwrite && sel.fpsm == 1)
return true;
break; break;
} }
@ -2732,7 +2753,8 @@ bool GSDrawScanline::TestAlpha(T& test, T& fm, T& zm, const T& ga)
{ {
case AFAIL_KEEP: case AFAIL_KEEP:
test |= t; test |= t;
if(test.alltrue()) return false; if (test.alltrue())
return false;
break; break;
case AFAIL_FB_ONLY: case AFAIL_FB_ONLY:
@ -2757,7 +2779,8 @@ bool GSDrawScanline::TestAlpha(T& test, T& fm, T& zm, const T& ga)
static const int s_offsets[] = {0, 2, 8, 10, 16, 18, 24, 26}; // columnTable16[0] static const int s_offsets[] = {0, 2, 8, 10, 16, 18, 24, 26}; // columnTable16[0]
template<class T> void GSDrawScanline::WritePixel(const T& src, int addr, int i, uint32 psm) template <class T>
void GSDrawScanline::WritePixel(const T& src, int addr, int i, uint32 psm)
{ {
uint8* dst = (uint8*)m_global.vm + addr * 2 + s_offsets[i] * 2; uint8* dst = (uint8*)m_global.vm + addr * 2 + s_offsets[i] * 2;
@ -2871,7 +2894,8 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
template <class T, bool masked> template <class T, bool masked>
void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
{ {
if(m == 0xffffffff) return; if (m == 0xffffffff)
return;
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
@ -2896,7 +2920,8 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col,
color = color.andnot(mask); color = color.andnot(mask);
c = c & (~m); c = c & (~m);
if(masked) ASSERT(mask.u32[0] != 0); if (masked)
ASSERT(mask.u32[0] != 0);
GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8)); GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8));
@ -2922,7 +2947,8 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col,
template <class T, bool masked> template <class T, bool masked>
void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
{ {
if(r.x >= r.z) return; if (r.x >= r.z)
return;
T* vm = (T*)m_global.vm; T* vm = (T*)m_global.vm;
@ -2942,7 +2968,8 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
template <class T, bool masked> template <class T, bool masked>
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m) void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m)
{ {
if(r.x >= r.z) return; if (r.x >= r.z)
return;
T* vm = (T*)m_global.vm; T* vm = (T*)m_global.vm;
@ -2971,7 +2998,8 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
template <class T, bool masked> template <class T, bool masked>
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m) void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
{ {
if(r.x >= r.z) return; if (r.x >= r.z)
return;
T* vm = (T*)m_global.vm; T* vm = (T*)m_global.vm;

View File

@ -86,5 +86,8 @@ public:
#endif #endif
void PrintStats() {m_ds_map.PrintStats();} void PrintStats()
{
m_ds_map.PrintStats();
}
}; };

View File

@ -43,9 +43,12 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key
if (m_sel.breakpoint) if (m_sel.breakpoint)
db(0xCC); db(0xCC);
try { try
{
Generate(); Generate();
} catch (std::exception& e) { }
catch (std::exception& e)
{
fprintf(stderr, "ERR:GSDrawScanlineCodeGenerator %s\n", e.what()); fprintf(stderr, "ERR:GSDrawScanlineCodeGenerator %s\n", e.what());
} }
} }
@ -63,7 +66,6 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uin
vpsllw(a, shift + 1); vpsllw(a, shift + 1);
vpmulhw(a, f); vpmulhw(a, f);
} }
} }
else else
{ {
@ -133,7 +135,8 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
// Greg: why ? // Greg: why ?
if(m_cpu.has(util::Cpu::tAVX2)) { if (m_cpu.has(util::Cpu::tAVX2))
{
ASSERT(a.isYMM()); ASSERT(a.isYMM());
vpermq(Ymm(a.getIdx()), Ymm(a.getIdx()), _MM_SHUFFLE(3, 1, 2, 0)); // this sucks vpermq(Ymm(a.getIdx()), Ymm(a.getIdx()), _MM_SHUFFLE(3, 1, 2, 0)); // this sucks
} }
@ -227,13 +230,18 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const
if (m_cpu.has(util::Cpu::tAVX)) if (m_cpu.has(util::Cpu::tAVX))
{ {
if (src == h) { if (src == h)
{
vpsllw(l, src, 8); vpsllw(l, src, 8);
vpsrlw(h, 8); vpsrlw(h, 8);
} else if (src == l) { }
else if (src == l)
{
vpsrlw(h, src, 8); vpsrlw(h, src, 8);
vpsllw(l, 8); vpsllw(l, 8);
} else { }
else
{
vpsllw(l, src, 8); vpsllw(l, src, 8);
vpsrlw(h, src, 8); vpsrlw(h, src, 8);
} }
@ -241,11 +249,16 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const
} }
else else
{ {
if (src == h) { if (src == h)
{
movdqa(l, src); movdqa(l, src);
} else if (src == l) { }
else if (src == l)
{
movdqa(h, src); movdqa(h, src);
} else { }
else
{
movdqa(l, src); movdqa(l, src);
movdqa(h, src); movdqa(h, src);
} }

View File

@ -1668,18 +1668,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: vmovdqa(_rb, _dst_rb); break; break;
case 2: vpxor(_rb, _rb); break; case 1:
vmovdqa(_rb, _dst_rb);
break;
case 2:
vpxor(_rb, _rb);
break;
} }
// rb = rb.sub16(c[abb * 2 + 0]); // rb = rb.sub16(c[abb * 2 + 0]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: vpsubw(_rb, xmm5); break; case 0:
case 1: vpsubw(_rb, _dst_rb); break; vpsubw(_rb, xmm5);
case 2: break; break;
case 1:
vpsubw(_rb, _dst_rb);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -1708,9 +1718,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: vpaddw(_rb, xmm5); break; case 0:
case 1: vpaddw(_rb, _dst_rb); break; vpaddw(_rb, xmm5);
case 2: break; break;
case 1:
vpaddw(_rb, _dst_rb);
break;
case 2:
break;
} }
} }
else else
@ -1719,9 +1734,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: vmovdqa(_rb, _dst_rb); break; break;
case 2: vpxor(_rb, _rb); break; case 1:
vmovdqa(_rb, _dst_rb);
break;
case 2:
vpxor(_rb, _rb);
break;
} }
} }
@ -1752,18 +1772,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: vmovdqa(_ga, _dst_ga); break; break;
case 2: vpxor(_ga, _ga); break; case 1:
vmovdqa(_ga, _dst_ga);
break;
case 2:
vpxor(_ga, _ga);
break;
} }
// ga = ga.sub16(c[abeb * 2 + 1]); // ga = ga.sub16(c[abeb * 2 + 1]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: vpsubw(_ga, xmm5); break; case 0:
case 1: vpsubw(_ga, _dst_ga); break; vpsubw(_ga, xmm5);
case 2: break; break;
case 1:
vpsubw(_ga, _dst_ga);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -1777,9 +1807,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: vpaddw(_ga, xmm5); break; case 0:
case 1: vpaddw(_ga, _dst_ga); break; vpaddw(_ga, xmm5);
case 2: break; break;
case 1:
vpaddw(_ga, _dst_ga);
break;
case 2:
break;
} }
} }
else else
@ -1788,9 +1823,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: vmovdqa(_ga, _dst_ga); break; break;
case 2: vpxor(_ga, _ga); break; case 1:
vmovdqa(_ga, _dst_ga);
break;
case 2:
vpxor(_ga, _ga);
break;
} }
} }
@ -1843,7 +1883,6 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX()
vpaddw(xmm2, ptr[rax + sizeof(GSVector4i) * 0]); vpaddw(xmm2, ptr[rax + sizeof(GSVector4i) * 0]);
vpaddw(xmm3, ptr[rax + sizeof(GSVector4i) * 1]); vpaddw(xmm3, ptr[rax + sizeof(GSVector4i) * 1]);
} }
if (m_sel.colclamp == 0) if (m_sel.colclamp == 0)
@ -1997,12 +2036,16 @@ void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg64& ad
switch (psm) switch (psm)
{ {
case 0: case 0:
if(i == 0) vmovd(dst, src); if (i == 0)
else vpextrd(dst, src, i); vmovd(dst, src);
else
vpextrd(dst, src, i);
break; break;
case 1: case 1:
if(i == 0) vmovd(eax, src); if (i == 0)
else vpextrd(eax, src, i); vmovd(eax, src);
else
vpextrd(eax, src, i);
xor(eax, dst); xor(eax, dst);
and(eax, 0xffffff); and(eax, 0xffffff);
xor(dst, eax); xor(dst, eax);
@ -2033,14 +2076,19 @@ void GSDrawScanlineCodeGenerator::ReadTexel_AVX(const Xmm& dst, const Xmm& addr,
const Address& src = m_sel.tlu ? ptr[_m_local__gd__clut + rax * 4] : ptr[_m_local__gd__tex + rax * 4]; const Address& src = m_sel.tlu ? ptr[_m_local__gd__clut + rax * 4] : ptr[_m_local__gd__tex + rax * 4];
// Extract address offset // Extract address offset
if(i == 0) vmovd(eax, addr); if (i == 0)
else vpextrd(eax, addr, i); vmovd(eax, addr);
else
vpextrd(eax, addr, i);
// If clut, load the value as a byte index // If clut, load the value as a byte index
if(m_sel.tlu) movzx(eax, byte[_m_local__gd__tex + rax]); if (m_sel.tlu)
movzx(eax, byte[_m_local__gd__tex + rax]);
if(i == 0) vmovd(dst, src); if (i == 0)
else vpinsrd(dst, src, i); vmovd(dst, src);
else
vpinsrd(dst, src, i);
} }
// Gather example (AVX2). Not faster on Haswell but potentially better on recent CPU // Gather example (AVX2). Not faster on Haswell but potentially better on recent CPU

View File

@ -2559,18 +2559,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: vmovdqa(ymm5, ymm0); break; break;
case 2: vpxor(ymm5, ymm5); break; case 1:
vmovdqa(ymm5, ymm0);
break;
case 2:
vpxor(ymm5, ymm5);
break;
} }
// rb = rb.sub16(c[abb * 2 + 0]); // rb = rb.sub16(c[abb * 2 + 0]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: vpsubw(ymm5, ymm4); break; case 0:
case 1: vpsubw(ymm5, ymm0); break; vpsubw(ymm5, ymm4);
case 2: break; break;
case 1:
vpsubw(ymm5, ymm0);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -2599,9 +2609,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: vpaddw(ymm5, ymm4); break; case 0:
case 1: vpaddw(ymm5, ymm0); break; vpaddw(ymm5, ymm4);
case 2: break; break;
case 1:
vpaddw(ymm5, ymm0);
break;
case 2:
break;
} }
} }
else else
@ -2610,9 +2625,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: vmovdqa(ymm5, ymm0); break; break;
case 2: vpxor(ymm5, ymm5); break; case 1:
vmovdqa(ymm5, ymm0);
break;
case 2:
vpxor(ymm5, ymm5);
break;
} }
} }
@ -2643,18 +2663,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: vmovdqa(ymm6, ymm1); break; break;
case 2: vpxor(ymm6, ymm6); break; case 1:
vmovdqa(ymm6, ymm1);
break;
case 2:
vpxor(ymm6, ymm6);
break;
} }
// ga = ga.sub16(c[abeb * 2 + 1]); // ga = ga.sub16(c[abeb * 2 + 1]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: vpsubw(ymm6, ymm4); break; case 0:
case 1: vpsubw(ymm6, ymm1); break; vpsubw(ymm6, ymm4);
case 2: break; break;
case 1:
vpsubw(ymm6, ymm1);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -2668,9 +2698,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: vpaddw(ymm6, ymm4); break; case 0:
case 1: vpaddw(ymm6, ymm1); break; vpaddw(ymm6, ymm4);
case 2: break; break;
case 1:
vpaddw(ymm6, ymm1);
break;
case 2:
break;
} }
} }
else else
@ -2679,9 +2714,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: vmovdqa(ymm6, ymm1); break; break;
case 2: vpxor(ymm6, ymm6); break; case 1:
vmovdqa(ymm6, ymm1);
break;
case 2:
vpxor(ymm6, ymm6);
break;
} }
} }
@ -2918,19 +2958,25 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const RegLong& addr
switch (psm) switch (psm)
{ {
case 0: case 0:
if(j == 0) vmovd(dst, src); if (j == 0)
else vpextrd(dst, src, j); vmovd(dst, src);
else
vpextrd(dst, src, j);
break; break;
case 1: case 1:
if(j == 0) vmovd(eax, src); if (j == 0)
else vpextrd(eax, src, j); vmovd(eax, src);
else
vpextrd(eax, src, j);
xor(eax, dst); xor(eax, dst);
and(eax, 0xffffff); and(eax, 0xffffff);
xor(dst, eax); xor(dst, eax);
break; break;
case 2: case 2:
if(j == 0) vmovd(eax, src); if (j == 0)
else vpextrw(eax, src, j * 2); vmovd(eax, src);
else
vpextrw(eax, src, j * 2);
mov(dst, ax); mov(dst, ax);
break; break;
} }
@ -3042,13 +3088,18 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Ymm& dst, const Ymm& addr, uin
const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4]; const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4];
if(i == 0) vmovd(eax, Xmm(addr.getIdx())); if (i == 0)
else vpextrd(eax, Xmm(addr.getIdx()), i); vmovd(eax, Xmm(addr.getIdx()));
else
vpextrd(eax, Xmm(addr.getIdx()), i);
if(m_sel.tlu) movzx(eax, byte[ebx + eax]); if (m_sel.tlu)
movzx(eax, byte[ebx + eax]);
if(i == 0) vmovd(Xmm(dst.getIdx()), src); if (i == 0)
else vpinsrd(Xmm(dst.getIdx()), src, i); vmovd(Xmm(dst.getIdx()), src);
else
vpinsrd(Xmm(dst.getIdx()), src, i);
} }

View File

@ -2472,18 +2472,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: vmovdqa(xmm5, xmm0); break; break;
case 2: vpxor(xmm5, xmm5); break; case 1:
vmovdqa(xmm5, xmm0);
break;
case 2:
vpxor(xmm5, xmm5);
break;
} }
// rb = rb.sub16(c[abb * 2 + 0]); // rb = rb.sub16(c[abb * 2 + 0]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: vpsubw(xmm5, xmm4); break; case 0:
case 1: vpsubw(xmm5, xmm0); break; vpsubw(xmm5, xmm4);
case 2: break; break;
case 1:
vpsubw(xmm5, xmm0);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -2512,9 +2522,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: vpaddw(xmm5, xmm4); break; case 0:
case 1: vpaddw(xmm5, xmm0); break; vpaddw(xmm5, xmm4);
case 2: break; break;
case 1:
vpaddw(xmm5, xmm0);
break;
case 2:
break;
} }
} }
else else
@ -2523,9 +2538,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: vmovdqa(xmm5, xmm0); break; break;
case 2: vpxor(xmm5, xmm5); break; case 1:
vmovdqa(xmm5, xmm0);
break;
case 2:
vpxor(xmm5, xmm5);
break;
} }
} }
@ -2556,18 +2576,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: vmovdqa(xmm6, xmm1); break; break;
case 2: vpxor(xmm6, xmm6); break; case 1:
vmovdqa(xmm6, xmm1);
break;
case 2:
vpxor(xmm6, xmm6);
break;
} }
// ga = ga.sub16(c[abeb * 2 + 1]); // ga = ga.sub16(c[abeb * 2 + 1]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: vpsubw(xmm6, xmm4); break; case 0:
case 1: vpsubw(xmm6, xmm1); break; vpsubw(xmm6, xmm4);
case 2: break; break;
case 1:
vpsubw(xmm6, xmm1);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -2581,9 +2611,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: vpaddw(xmm6, xmm4); break; case 0:
case 1: vpaddw(xmm6, xmm1); break; vpaddw(xmm6, xmm4);
case 2: break; break;
case 1:
vpaddw(xmm6, xmm1);
break;
case 2:
break;
} }
} }
else else
@ -2592,9 +2627,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: vmovdqa(xmm6, xmm1); break; break;
case 2: vpxor(xmm6, xmm6); break; case 1:
vmovdqa(xmm6, xmm1);
break;
case 2:
vpxor(xmm6, xmm6);
break;
} }
} }
@ -2785,19 +2825,25 @@ void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg32& ad
switch (psm) switch (psm)
{ {
case 0: case 0:
if(i == 0) vmovd(dst, src); if (i == 0)
else vpextrd(dst, src, i); vmovd(dst, src);
else
vpextrd(dst, src, i);
break; break;
case 1: case 1:
if(i == 0) vmovd(eax, src); if (i == 0)
else vpextrd(eax, src, i); vmovd(eax, src);
else
vpextrd(eax, src, i);
xor(eax, dst); xor(eax, dst);
and(eax, 0xffffff); and(eax, 0xffffff);
xor(dst, eax); xor(dst, eax);
break; break;
case 2: case 2:
if(i == 0) vmovd(eax, src); if (i == 0)
else vpextrw(eax, src, i * 2); vmovd(eax, src);
else
vpextrw(eax, src, i * 2);
mov(dst, ax); mov(dst, ax);
break; break;
} }
@ -2878,13 +2924,18 @@ void GSDrawScanlineCodeGenerator::ReadTexel_AVX(const Xmm& dst, const Xmm& addr,
const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4]; const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4];
if(i == 0) vmovd(eax, addr); if (i == 0)
else vpextrd(eax, addr, i); vmovd(eax, addr);
else
vpextrd(eax, addr, i);
if(m_sel.tlu) movzx(eax, byte[ebx + eax]); if (m_sel.tlu)
movzx(eax, byte[ebx + eax]);
if(i == 0) vmovd(dst, src); if (i == 0)
else vpinsrd(dst, src, i); vmovd(dst, src);
else
vpinsrd(dst, src, i);
} }
#endif #endif

View File

@ -2477,18 +2477,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: vmovdqa(ymm5, ymm0); break; break;
case 2: vpxor(ymm5, ymm5); break; case 1:
vmovdqa(ymm5, ymm0);
break;
case 2:
vpxor(ymm5, ymm5);
break;
} }
// rb = rb.sub16(c[abb * 2 + 0]); // rb = rb.sub16(c[abb * 2 + 0]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: vpsubw(ymm5, ymm4); break; case 0:
case 1: vpsubw(ymm5, ymm0); break; vpsubw(ymm5, ymm4);
case 2: break; break;
case 1:
vpsubw(ymm5, ymm0);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -2517,9 +2527,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: vpaddw(ymm5, ymm4); break; case 0:
case 1: vpaddw(ymm5, ymm0); break; vpaddw(ymm5, ymm4);
case 2: break; break;
case 1:
vpaddw(ymm5, ymm0);
break;
case 2:
break;
} }
} }
else else
@ -2528,9 +2543,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: vmovdqa(ymm5, ymm0); break; break;
case 2: vpxor(ymm5, ymm5); break; case 1:
vmovdqa(ymm5, ymm0);
break;
case 2:
vpxor(ymm5, ymm5);
break;
} }
} }
@ -2561,18 +2581,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: vmovdqa(ymm6, ymm1); break; break;
case 2: vpxor(ymm6, ymm6); break; case 1:
vmovdqa(ymm6, ymm1);
break;
case 2:
vpxor(ymm6, ymm6);
break;
} }
// ga = ga.sub16(c[abeb * 2 + 1]); // ga = ga.sub16(c[abeb * 2 + 1]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: vpsubw(ymm6, ymm4); break; case 0:
case 1: vpsubw(ymm6, ymm1); break; vpsubw(ymm6, ymm4);
case 2: break; break;
case 1:
vpsubw(ymm6, ymm1);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -2586,9 +2616,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: vpaddw(ymm6, ymm4); break; case 0:
case 1: vpaddw(ymm6, ymm1); break; vpaddw(ymm6, ymm4);
case 2: break; break;
case 1:
vpaddw(ymm6, ymm1);
break;
case 2:
break;
} }
} }
else else
@ -2597,9 +2632,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: vmovdqa(ymm6, ymm1); break; break;
case 2: vpxor(ymm6, ymm6); break; case 1:
vmovdqa(ymm6, ymm1);
break;
case 2:
vpxor(ymm6, ymm6);
break;
} }
} }
@ -2836,19 +2876,25 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
switch (psm) switch (psm)
{ {
case 0: case 0:
if(j == 0) vmovd(dst, src); if (j == 0)
else vpextrd(dst, src, j); vmovd(dst, src);
else
vpextrd(dst, src, j);
break; break;
case 1: case 1:
if(j == 0) vmovd(eax, src); if (j == 0)
else vpextrd(eax, src, j); vmovd(eax, src);
else
vpextrd(eax, src, j);
xor(eax, dst); xor(eax, dst);
and(eax, 0xffffff); and(eax, 0xffffff);
xor(dst, eax); xor(dst, eax);
break; break;
case 2: case 2:
if(j == 0) vmovd(eax, src); if (j == 0)
else vpextrw(eax, src, j * 2); vmovd(eax, src);
else
vpextrw(eax, src, j * 2);
mov(dst, ax); mov(dst, ax);
break; break;
} }
@ -2960,13 +3006,18 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Ymm& dst, const Ymm& addr, uin
const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4]; const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4];
if(i == 0) vmovd(eax, Xmm(addr.getIdx())); if (i == 0)
else vpextrd(eax, Xmm(addr.getIdx()), i); vmovd(eax, Xmm(addr.getIdx()));
else
vpextrd(eax, Xmm(addr.getIdx()), i);
if(m_sel.tlu) movzx(eax, byte[ebx + eax]); if (m_sel.tlu)
movzx(eax, byte[ebx + eax]);
if(i == 0) vmovd(Xmm(dst.getIdx()), src); if (i == 0)
else vpinsrd(Xmm(dst.getIdx()), src, i); vmovd(Xmm(dst.getIdx()), src);
else
vpinsrd(Xmm(dst.getIdx()), src, i);
} }

View File

@ -2481,18 +2481,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: movdqa(xmm5, xmm0); break; break;
case 2: pxor(xmm5, xmm5); break; case 1:
movdqa(xmm5, xmm0);
break;
case 2:
pxor(xmm5, xmm5);
break;
} }
// rb = rb.sub16(c[abb * 2 + 0]); // rb = rb.sub16(c[abb * 2 + 0]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: psubw(xmm5, xmm4); break; case 0:
case 1: psubw(xmm5, xmm0); break; psubw(xmm5, xmm4);
case 2: break; break;
case 1:
psubw(xmm5, xmm0);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -2521,9 +2531,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: paddw(xmm5, xmm4); break; case 0:
case 1: paddw(xmm5, xmm0); break; paddw(xmm5, xmm4);
case 2: break; break;
case 1:
paddw(xmm5, xmm0);
break;
case 2:
break;
} }
} }
else else
@ -2532,9 +2547,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: movdqa(xmm5, xmm0); break; break;
case 2: pxor(xmm5, xmm5); break; case 1:
movdqa(xmm5, xmm0);
break;
case 2:
pxor(xmm5, xmm5);
break;
} }
} }
@ -2566,18 +2586,28 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE()
switch (m_sel.aba) switch (m_sel.aba)
{ {
case 0: break; case 0:
case 1: movdqa(xmm6, xmm1); break; break;
case 2: pxor(xmm6, xmm6); break; case 1:
movdqa(xmm6, xmm1);
break;
case 2:
pxor(xmm6, xmm6);
break;
} }
// ga = ga.sub16(c[abeb * 2 + 1]); // ga = ga.sub16(c[abeb * 2 + 1]);
switch (m_sel.abb) switch (m_sel.abb)
{ {
case 0: psubw(xmm6, xmm4); break; case 0:
case 1: psubw(xmm6, xmm1); break; psubw(xmm6, xmm4);
case 2: break; break;
case 1:
psubw(xmm6, xmm1);
break;
case 2:
break;
} }
if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) if (!(m_sel.fpsm == 1 && m_sel.abc == 1))
@ -2591,9 +2621,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: paddw(xmm6, xmm4); break; case 0:
case 1: paddw(xmm6, xmm1); break; paddw(xmm6, xmm4);
case 2: break; break;
case 1:
paddw(xmm6, xmm1);
break;
case 2:
break;
} }
} }
else else
@ -2602,9 +2637,14 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE()
switch (m_sel.abd) switch (m_sel.abd)
{ {
case 0: break; case 0:
case 1: movdqa(xmm6, xmm1); break; break;
case 2: pxor(xmm6, xmm6); break; case 1:
movdqa(xmm6, xmm1);
break;
case 2:
pxor(xmm6, xmm6);
break;
} }
} }
@ -2798,14 +2838,18 @@ void GSDrawScanlineCodeGenerator::WritePixel_SSE(const Xmm& src, const Reg32& ad
switch (psm) switch (psm)
{ {
case 0: case 0:
if(i == 0) movd(dst, src); if (i == 0)
else { movd(dst, src);
else
{
pextrd(dst, src, i); pextrd(dst, src, i);
} }
break; break;
case 1: case 1:
if(i == 0) movd(eax, src); if (i == 0)
else { movd(eax, src);
else
{
pextrd(eax, src, i); pextrd(eax, src, i);
} }
xor(eax, dst); xor(eax, dst);
@ -2813,8 +2857,10 @@ void GSDrawScanlineCodeGenerator::WritePixel_SSE(const Xmm& src, const Reg32& ad
xor(dst, eax); xor(dst, eax);
break; break;
case 2: case 2:
if(i == 0) movd(eax, src); if (i == 0)
else pextrw(eax, src, i * 2); movd(eax, src);
else
pextrw(eax, src, i * 2);
mov(dst, ax); mov(dst, ax);
break; break;
} }
@ -2895,13 +2941,18 @@ void GSDrawScanlineCodeGenerator::ReadTexel_SSE(const Xmm& dst, const Xmm& addr,
ASSERT(i == 0 || m_cpu.has(util::Cpu::tSSE41)); ASSERT(i == 0 || m_cpu.has(util::Cpu::tSSE41));
if(i == 0) movd(eax, addr); if (i == 0)
else pextrd(eax, addr, i); movd(eax, addr);
else
pextrd(eax, addr, i);
if(m_sel.tlu) movzx(eax, byte[ebx + eax]); if (m_sel.tlu)
movzx(eax, byte[ebx + eax]);
if(i == 0) movd(dst, src); if (i == 0)
else pinsrd(dst, src, i); movd(dst, src);
else
pinsrd(dst, src, i);
} }
#endif #endif

View File

@ -26,7 +26,8 @@
int GSRasterizerData::s_counter = 0; int GSRasterizerData::s_counter = 0;
static int compute_best_thread_height(int threads) { static int compute_best_thread_height(int threads)
{
// - for more threads screen segments should be smaller to better distribute the pixels // - for more threads screen segments should be smaller to better distribute the pixels
// - but not too small to keep the threading overhead low // - but not too small to keep the threading overhead low
// - ideal value between 3 and 5, or log2(64 / number of threads) // - ideal value between 3 and 5, or log2(64 / number of threads)
@ -70,7 +71,8 @@ GSRasterizer::~GSRasterizer()
{ {
_aligned_free(m_scanline); _aligned_free(m_scanline);
if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048); if (m_edge.buff != NULL)
vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048);
delete m_ds; delete m_ds;
} }
@ -106,7 +108,8 @@ int GSRasterizer::FindMyNextScanline(int top) const
if (m_scanline[i] == 0) if (m_scanline[i] == 0)
{ {
while(m_scanline[++i] == 0); while (m_scanline[++i] == 0)
;
top = i << m_thread_height; top = i << m_thread_height;
} }
@ -135,7 +138,8 @@ void GSRasterizer::Draw(GSRasterizerData* data)
{ {
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id); GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return; if (data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0)
return;
m_pixels.actual = 0; m_pixels.actual = 0;
m_pixels.total = 0; m_pixels.total = 0;
@ -177,13 +181,19 @@ void GSRasterizer::Draw(GSRasterizerData* data)
if (index != NULL) if (index != NULL)
{ {
do {DrawLine(vertex, index); index += 2;} do
while(index < index_end); {
DrawLine(vertex, index);
index += 2;
} while (index < index_end);
} }
else else
{ {
do {DrawLine(vertex, tmp_index); vertex += 2;} do
while(vertex < vertex_end); {
DrawLine(vertex, tmp_index);
vertex += 2;
} while (vertex < vertex_end);
} }
break; break;
@ -192,13 +202,19 @@ void GSRasterizer::Draw(GSRasterizerData* data)
if (index != NULL) if (index != NULL)
{ {
do {DrawTriangle(vertex, index); index += 3;} do
while(index < index_end); {
DrawTriangle(vertex, index);
index += 3;
} while (index < index_end);
} }
else else
{ {
do {DrawTriangle(vertex, tmp_index); vertex += 3;} do
while(vertex < vertex_end); {
DrawTriangle(vertex, tmp_index);
vertex += 3;
} while (vertex < vertex_end);
} }
break; break;
@ -207,13 +223,19 @@ void GSRasterizer::Draw(GSRasterizerData* data)
if (index != NULL) if (index != NULL)
{ {
do {DrawSprite(vertex, index); index += 2;} do
while(index < index_end); {
DrawSprite(vertex, index);
index += 2;
} while (index < index_end);
} }
else else
{ {
do {DrawSprite(vertex, tmp_index); vertex += 2;} do
while(vertex < vertex_end); {
DrawSprite(vertex, tmp_index);
vertex += 2;
} while (vertex < vertex_end);
} }
break; break;
@ -370,7 +392,8 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
} }
} }
if(--steps == 0) break; if (--steps == 0)
break;
edge += dedge; edge += dedge;
} }
@ -428,7 +451,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
// if(i == 1) => y0 == y1 < y2 // if(i == 1) => y0 == y1 < y2
// if(i == 4) => y0 < y1 == y2 // if(i == 4) => y0 < y1 == y2
if(m1 == 7) return; // y0 == y1 == y2 if (m1 == 7) // y0 == y1 == y2
return;
GSVector4 tbf = y0011.xzxz(y1221).ceil(); GSVector4 tbf = y0011.xzxz(y1221).ceil();
GSVector4 tbmax = tbf.max(m_fscissor_y); GSVector4 tbmax = tbf.max(m_fscissor_y);
@ -447,7 +471,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
int m2 = cross.upl(cross == GSVector4::zero()).mask(); int m2 = cross.upl(cross == GSVector4::zero()).mask();
if(m2 & 2) return; if (m2 & 2)
return;
m2 &= 1; m2 &= 1;
@ -615,7 +640,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
// if(i == 1) => y0 == y1 < y2 // if(i == 1) => y0 == y1 < y2
// if(i == 4) => y0 < y1 == y2 // if(i == 4) => y0 < y1 == y2
if(m1 == 7) return; // y0 == y1 == y2 if (m1 == 7)
return; // y0 == y1 == y2
GSVector4 tbf = y0011.xzxz(y1221).ceil(); GSVector4 tbf = y0011.xzxz(y1221).ceil();
GSVector4 tbmax = tbf.max(m_fscissor_y); GSVector4 tbmax = tbf.max(m_fscissor_y);
@ -634,7 +660,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
int m2 = cross.upl(cross == GSVector4::zero()).mask(); int m2 = cross.upl(cross == GSVector4::zero()).mask();
if(m2 & 2) return; if (m2 & 2)
return;
m2 &= 1; m2 &= 1;
@ -795,7 +822,8 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
r = r.rintersect(m_scissor); r = r.rintersect(m_scissor);
if(r.rempty()) return; if (r.rempty())
return;
GSVertexSW scan = v[0]; GSVertexSW scan = v[0];
@ -860,7 +888,8 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
DrawScanline(r.width(), r.left, r.top, scan); DrawScanline(r.width(), r.left, r.top, scan);
} }
if(++r.top >= r.bottom) break; if (++r.top >= r.bottom)
break;
scan.t += dedge.t; scan.t += dedge.t;
} }
@ -897,7 +926,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
top = tb.extract32<0>(); // max(t, st) top = tb.extract32<0>(); // max(t, st)
bottom = tb.extract32<3>(); // min(b, sb) bottom = tb.extract32<3>(); // min(b, sb)
if(top >= bottom) return; if (top >= bottom)
return;
edge = v0; edge = v0;
dedge = dv / dv.p.yyyy(); dedge = dv / dv.p.yyyy();
@ -909,7 +939,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
top = tb.extract32<1>(); // max(b, st) top = tb.extract32<1>(); // max(b, st)
bottom = tb.extract32<2>(); // min(t, sb) bottom = tb.extract32<2>(); // min(t, sb)
if(top >= bottom) return; if (top >= bottom)
return;
edge = v1; edge = v1;
dedge = dv / dv.p.yyyy(); dedge = dv / dv.p.yyyy();
@ -938,7 +969,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
e++; e++;
} }
if(++top >= bottom) break; if (++top >= bottom)
break;
edge += dedge; edge += dedge;
x += dx; x += dx;
@ -960,7 +992,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
e++; e++;
} }
if(++top >= bottom) break; if (++top >= bottom)
break;
edge += dedge; edge += dedge;
x += dx; x += dx;
@ -983,7 +1016,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
left = lr.extract32<0>(); // max(l, sl) left = lr.extract32<0>(); // max(l, sl)
right = lr.extract32<3>(); // min(r, sr) right = lr.extract32<3>(); // min(r, sr)
if(left >= right) return; if (left >= right)
return;
edge = v0; edge = v0;
dedge = dv / dv.p.xxxx(); dedge = dv / dv.p.xxxx();
@ -995,7 +1029,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
left = lr.extract32<1>(); // max(r, sl) left = lr.extract32<1>(); // max(r, sl)
right = lr.extract32<2>(); // min(l, sr) right = lr.extract32<2>(); // min(l, sr)
if(left >= right) return; if (left >= right)
return;
edge = v1; edge = v1;
dedge = dv / dv.p.xxxx(); dedge = dv / dv.p.xxxx();
@ -1024,7 +1059,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
e++; e++;
} }
if(++left >= right) break; if (++left >= right)
break;
edge += dedge; edge += dedge;
y += dy; y += dy;
@ -1046,7 +1082,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
e++; e++;
} }
if(++left >= right) break; if (++left >= right)
break;
edge += dedge; edge += dedge;
y += dy; y += dy;
@ -1088,8 +1125,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
int top = e->_pad.i32[2]; int top = e->_pad.i32[2];
DrawScanline(pixels, left, top, *e++); DrawScanline(pixels, left, top, *e++);
} } while (e < ee);
while(e < ee);
} }
else else
{ {
@ -1100,8 +1136,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
int top = e->_pad.i32[2]; int top = e->_pad.i32[2];
DrawEdge(pixels, left, top, *e++); DrawEdge(pixels, left, top, *e++);
} } while (e < ee);
while(e < ee);
} }
m_edge.count = 0; m_edge.count = 0;

View File

@ -64,7 +64,8 @@ public:
virtual ~GSRasterizerData() virtual ~GSRasterizerData()
{ {
if(buff != NULL) _aligned_free(buff); if (buff != NULL)
_aligned_free(buff);
} }
}; };
@ -82,7 +83,13 @@ protected:
DrawRectPtr m_dr; DrawRectPtr m_dr;
public: public:
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {} IDrawScanline()
: m_sp(NULL)
, m_ds(NULL)
, m_de(NULL)
, m_dr(NULL)
{
}
virtual ~IDrawScanline() {} virtual ~IDrawScanline() {}
virtual void BeginDraw(const GSRasterizerData* data) = 0; virtual void BeginDraw(const GSRasterizerData* data) = 0;
@ -195,7 +202,8 @@ protected:
public: public:
virtual ~GSRasterizerList(); virtual ~GSRasterizerList();
template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon) template <class DS>
static IRasterizer* Create(int threads, GSPerfMon* perfmon)
{ {
threads = std::max<int>(threads, 0); threads = std::max<int>(threads, 0);

View File

@ -44,10 +44,12 @@ GSRendererSW::GSRendererSW(int threads)
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
for (uint32 i = 0; i < countof(m_fzb_pages); i++) { for (uint32 i = 0; i < countof(m_fzb_pages); i++)
{
m_fzb_pages[i] = 0; m_fzb_pages[i] = 0;
} }
for (uint32 i = 0; i < countof(m_tex_pages); i++) { for (uint32 i = 0; i < countof(m_tex_pages); i++)
{
m_tex_pages[i] = 0; m_tex_pages[i] = 0;
} }
@ -70,7 +72,8 @@ GSRendererSW::GSRendererSW(int threads)
// Reset handler with the auto flush hack enabled on the SW renderer. // Reset handler with the auto flush hack enabled on the SW renderer.
// Some games run better without the hack so rely on ini/gui option. // Some games run better without the hack so rely on ini/gui option.
if (!GLLoader::in_replayer && theApp.GetConfigB("autoflush_sw")) { if (!GLLoader::in_replayer && theApp.GetConfigB("autoflush_sw"))
{
m_userhacks_auto_flush = true; m_userhacks_auto_flush = true;
ResetHandlers(); ResetHandlers();
} }
@ -193,7 +196,8 @@ GSTexture* GSRendererSW::GetFeedbackOutput()
int dummy; int dummy;
// It is enough to emulate Xenosaga cutscene. (or any game that will do a basic loopback) // It is enough to emulate Xenosaga cutscene. (or any game that will do a basic loopback)
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++)
{
if (m_regs->EXTBUF.EXBP == m_regs->DISP[i].DISPFB.Block()) if (m_regs->EXTBUF.EXBP == m_regs->DISP[i].DISPFB.Block())
return GetOutput(i, dummy); return GetOutput(i, dummy);
} }
@ -402,8 +406,7 @@ void GSRendererSW::Draw()
(float)(v->XYZ.X - context->XYOFFSET.OFX) / 16, (float)(v->XYZ.X - context->XYOFFSET.OFX) / 16,
(float)(v->XYZ.Y - context->XYOFFSET.OFY) / 16, (float)(v->XYZ.Y - context->XYOFFSET.OFY) / 16,
PRIM->FST ? (float)(v->U) / 16 : v->ST.S / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q), PRIM->FST ? (float)(v->U) / 16 : v->ST.S / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q),
PRIM->FST ? (float)(v->V) / 16 : v->ST.T / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q) PRIM->FST ? (float)(v->V) / 16 : v->ST.T / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q));
);
} }
} }
} }
@ -467,7 +470,8 @@ void GSRendererSW::Draw()
if (s_savet && s_n >= s_saven && PRIM->TME) if (s_savet && s_n >= s_saven && PRIM->TME)
{ {
if (texture_shuffle) { if (texture_shuffle)
{
// Dump the RT in 32 bits format. It helps to debug texture shuffle effect // Dump the RT in 32 bits format. It helps to debug texture shuffle effect
s = format("%05d_f%lld_itexraw_%05x_32bits.bmp", s_n, frame, (int)m_context->TEX0.TBP0); s = format("%05d_f%lld_itexraw_%05x_32bits.bmp", s_n, frame, (int)m_context->TEX0.TBP0);
m_mem.SaveBMP(m_dump_root + s, m_context->TEX0.TBP0, m_context->TEX0.TBW, 0, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); m_mem.SaveBMP(m_dump_root + s, m_context->TEX0.TBP0, m_context->TEX0.TBW, 0, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);
@ -480,7 +484,8 @@ void GSRendererSW::Draw()
if (s_save && s_n >= s_saven) if (s_save && s_n >= s_saven)
{ {
if (texture_shuffle) { if (texture_shuffle)
{
// Dump the RT in 32 bits format. It helps to debug texture shuffle effect // Dump the RT in 32 bits format. It helps to debug texture shuffle effect
s = format("%05d_f%lld_rt0_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block()); s = format("%05d_f%lld_rt0_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block());
m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512);
@ -503,7 +508,8 @@ void GSRendererSW::Draw()
if (s_save && s_n >= s_saven) if (s_save && s_n >= s_saven)
{ {
if (texture_shuffle) { if (texture_shuffle)
{
// Dump the RT in 32 bits format. It helps to debug texture shuffle effect // Dump the RT in 32 bits format. It helps to debug texture shuffle effect
s = format("%05d_f%lld_rt1_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block()); s = format("%05d_f%lld_rt1_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block());
m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512);
@ -624,14 +630,22 @@ void GSRendererSW::Sync(int reason)
int pixels = m_rl->GetPixels(); int pixels = m_rl->GetPixels();
if(LOG) {fprintf(s_fp, "sync n=%d r=%d t=%llu p=%d %c\n", s_n, reason, t, pixels, t > 10000000 ? '*' : ' '); fflush(s_fp);} if (LOG)
{
fprintf(s_fp, "sync n=%d r=%d t=%llu p=%d %c\n", s_n, reason, t, pixels, t > 10000000 ? '*' : ' ');
fflush(s_fp);
}
m_perfmon.Put(GSPerfMon::Fillrate, pixels); m_perfmon.Put(GSPerfMon::Fillrate, pixels);
} }
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{ {
if(LOG) {fprintf(s_fp, "w %05x %u %u, %d %d %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM, r.x, r.y, r.z, r.w); fflush(s_fp);} if (LOG)
{
fprintf(s_fp, "w %05x %u %u, %d %d %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM, r.x, r.y, r.z, r.w);
fflush(s_fp);
}
GSOffset* off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); GSOffset* off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
@ -657,7 +671,11 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{ {
if(LOG) {fprintf(s_fp, "%s %05x %u %u, %d %d %d %d\n", clut ? "rp" : "r", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM, r.x, r.y, r.z, r.w); fflush(s_fp);} if (LOG)
{
fprintf(s_fp, "%s %05x %u %u, %d %d %d %d\n", clut ? "rp" : "r", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM, r.x, r.y, r.z, r.w);
fflush(s_fp);
}
if (!m_rl->IsSynced()) if (!m_rl->IsSynced())
{ {
@ -679,8 +697,10 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
void GSRendererSW::UsePages(const uint32* pages, const int type) void GSRendererSW::UsePages(const uint32* pages, const int type)
{ {
for(const uint32* p = pages; *p != GSOffset::EOP; p++) { for (const uint32* p = pages; *p != GSOffset::EOP; p++)
switch (type) { {
switch (type)
{
case 0: case 0:
ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX); ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX);
m_fzb_pages[*p] += 1; m_fzb_pages[*p] += 1;
@ -693,15 +713,18 @@ void GSRendererSW::UsePages(const uint32* pages, const int type)
ASSERT(m_tex_pages[*p] < USHRT_MAX); ASSERT(m_tex_pages[*p] < USHRT_MAX);
m_tex_pages[*p] += 1; m_tex_pages[*p] += 1;
break; break;
default:break; default:
break;
} }
} }
} }
void GSRendererSW::ReleasePages(const uint32* pages, const int type) void GSRendererSW::ReleasePages(const uint32* pages, const int type)
{ {
for(const uint32* p = pages; *p != GSOffset::EOP; p++) { for (const uint32* p = pages; *p != GSOffset::EOP; p++)
switch (type) { {
switch (type)
{
case 0: case 0:
ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0); ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0);
m_fzb_pages[*p] -= 1; m_fzb_pages[*p] -= 1;
@ -714,7 +737,8 @@ void GSRendererSW::ReleasePages(const uint32* pages, const int type)
ASSERT(m_tex_pages[*p] > 0); ASSERT(m_tex_pages[*p] > 0);
m_tex_pages[*p] -= 1; m_tex_pages[*p] -= 1;
break; break;
default:break; default:
break;
} }
} }
} }
@ -772,7 +796,11 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{ {
if (used) if (used)
{ {
if(LOG) {fprintf(s_fp, "syncpoint 0\n"); fflush(s_fp);} if (LOG)
{
fprintf(s_fp, "syncpoint 0\n");
fflush(s_fp);
}
res = true; res = true;
} }
@ -833,7 +861,11 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{ {
if (used) if (used)
{ {
if(LOG) {fprintf(s_fp, "syncpoint 1\n"); fflush(s_fp);} if (LOG)
{
fprintf(s_fp, "syncpoint 1\n");
fflush(s_fp);
}
res = true; res = true;
} }
@ -851,7 +883,11 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{ {
if (m_fzb_pages[*p] & 0xffff0000) if (m_fzb_pages[*p] & 0xffff0000)
{ {
if(LOG) {fprintf(s_fp, "syncpoint 2\n"); fflush(s_fp);} if (LOG)
{
fprintf(s_fp, "syncpoint 2\n");
fflush(s_fp);
}
res = true; res = true;
@ -866,7 +902,11 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{ {
if (m_fzb_pages[*p] & 0x0000ffff) if (m_fzb_pages[*p] & 0x0000ffff)
{ {
if(LOG) {fprintf(s_fp, "syncpoint 3\n"); fflush(s_fp);} if (LOG)
{
fprintf(s_fp, "syncpoint 3\n");
fflush(s_fp);
}
res = true; res = true;
@ -987,7 +1027,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
zwrite || ztest ? m_context->ZBUF.Block() : 0xfffff, m_context->ZBUF.PSM, zwrite || ztest ? m_context->ZBUF.Block() : 0xfffff, m_context->ZBUF.PSM,
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH); PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH);
*/ */
if(!fwrite && !zwrite) return false; if (!fwrite && !zwrite)
return false;
gd.sel.fwrite = fwrite; gd.sel.fwrite = fwrite;
gd.sel.ftest = ftest; gd.sel.ftest = ftest;
@ -1037,7 +1078,11 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA); GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA);
if(t == NULL) {ASSERT(0); return false;} if (t == NULL)
{
ASSERT(0);
return false;
}
data->SetSource(t, r, 0); data->SetSource(t, r, 0);
@ -1131,7 +1176,11 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3); GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3);
if(t == NULL) {ASSERT(0); return false;} if (t == NULL)
{
ASSERT(0);
return false;
}
GSVector4i r; GSVector4i r;
@ -1399,23 +1448,28 @@ GSRendererSW::SharedData::~SharedData()
{ {
ReleasePages(); ReleasePages();
if(global.clut) _aligned_free(global.clut); if (global.clut)
if(global.dimx) _aligned_free(global.dimx); _aligned_free(global.clut);
if (global.dimx)
_aligned_free(global.dimx);
if(LOG) {fprintf(s_fp, "[%d] done t=%lld p=%d | %d %d %d | %08x_%08x\n", if (LOG)
{
fprintf(s_fp, "[%d] done t=%lld p=%d | %d %d %d | %08x_%08x\n",
counter, counter,
__rdtsc() - start, pixels, __rdtsc() - start, pixels,
primclass, vertex_count, index_count, primclass, vertex_count, index_count,
global.sel.hi, global.sel.lo global.sel.hi, global.sel.lo);
); fflush(s_fp);
fflush(s_fp);} }
} }
//static TransactionScope::Lock s_lock; //static TransactionScope::Lock s_lock;
void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm) void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm)
{ {
if(m_using_pages) return; if (m_using_pages)
return;
{ {
//TransactionScope scope(s_lock); //TransactionScope scope(s_lock);
@ -1446,7 +1500,8 @@ void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const
void GSRendererSW::SharedData::ReleasePages() void GSRendererSW::SharedData::ReleasePages()
{ {
if(!m_using_pages) return; if (!m_using_pages)
return;
{ {
//TransactionScope scope(s_lock); //TransactionScope scope(s_lock);

View File

@ -47,7 +47,12 @@ class GSRendererSW : public GSRenderer
int m_zpsm; int m_zpsm;
bool m_using_pages; bool m_using_pages;
TextureLevel m_tex[7 + 1]; // NULL terminated TextureLevel m_tex[7 + 1]; // NULL terminated
enum {SyncNone, SyncSource, SyncTarget} m_syncpoint; enum
{
SyncNone,
SyncSource,
SyncTarget
} m_syncpoint;
public: public:
SharedData(GSRendererSW* parent); SharedData(GSRendererSW* parent);

View File

@ -94,21 +94,17 @@ union GSScanlineSelector
uint64 key; uint64 key;
GSScanlineSelector() = default; GSScanlineSelector() = default;
GSScanlineSelector(uint64 k) : key(k) {} GSScanlineSelector(uint64 k)
: key(k)
{
}
operator uint32() const { return lo; } operator uint32() const { return lo; }
operator uint64() const { return key; } operator uint64() const { return key; }
bool IsSolidRect() const bool IsSolidRect() const
{ {
return prim == GS_SPRITE_CLASS return prim == GS_SPRITE_CLASS && iip == 0 && tfx == TFX_NONE && abe == 0 && ztst <= 1 && atst <= 1 && date == 0 && fge == 0;
&& iip == 0
&& tfx == TFX_NONE
&& abe == 0
&& ztst <= 1
&& atst <= 1
&& date == 0
&& fge == 0;
} }
void Print() const void Print() const
@ -319,14 +315,15 @@ struct GSScanlineConstantData : public GSAlignedClass<32>
1.0f 1.0f
}; };
for (size_t n = 0; n < countof(log2_coef); ++n) { for (size_t n = 0; n < countof(log2_coef); ++n)
for (size_t i = 0; i < 4; ++i) { {
for (size_t i = 0; i < 4; ++i)
{
m_log2_coef_128b[n][i] = log2_coef[n]; m_log2_coef_128b[n][i] = log2_coef[n];
m_log2_coef_256b[n][i] = log2_coef[n]; m_log2_coef_256b[n][i] = log2_coef[n];
m_log2_coef_256b[n][i + 4] = log2_coef[n]; m_log2_coef_256b[n][i + 4] = log2_coef[n];
} }
} }
} }
}; };

View File

@ -36,7 +36,8 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0; m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0; m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0;
try { try
{
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
Generate_AVX2(); Generate_AVX2();
#else #else
@ -45,7 +46,9 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void
else else
Generate_SSE(); Generate_SSE();
#endif #endif
} catch (std::exception& e) { }
catch (std::exception& e)
{
fprintf(stderr, "ERR:GSSetupPrimCodeGenerator %s\n", e.what()); fprintf(stderr, "ERR:GSSetupPrimCodeGenerator %s\n", e.what());
} }
} }

View File

@ -33,7 +33,10 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
GSScanlineLocalData& m_local; GSScanlineLocalData& m_local;
bool m_rip; bool m_rip;
struct {uint32 z:1, f:1, t:1, c:1;} m_en; struct
{
uint32 z : 1, f : 1, t : 1, c : 1;
} m_en;
#if _M_SSE < 0x501 #if _M_SSE < 0x501
void Generate_SSE(); void Generate_SSE();

View File

@ -103,8 +103,10 @@ void GSSetupPrimCodeGenerator::Depth_AVX2()
{ {
// m_local.d[i].z = dz * shift[1 + i]; // m_local.d[i].z = dz * shift[1 + i];
if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); if (i < 4)
else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); vmulps(ymm0, ymm2, Ymm(4 + i));
else
vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
vmovaps(ptr[&m_local.d[i].z], ymm0); vmovaps(ptr[&m_local.d[i].z], ymm0);
} }
@ -112,8 +114,10 @@ void GSSetupPrimCodeGenerator::Depth_AVX2()
{ {
// m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh(); // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh();
if(i < 4) vmulps(ymm0, ymm1, Ymm(4 + i)); if (i < 4)
else vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]); vmulps(ymm0, ymm1, Ymm(4 + i));
else
vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
vcvttps2dq(ymm0, ymm0); vcvttps2dq(ymm0, ymm0);
vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
@ -189,8 +193,10 @@ void GSSetupPrimCodeGenerator::Texture_AVX2()
{ {
// GSVector8 v = dstq * shift[1 + i]; // GSVector8 v = dstq * shift[1 + i];
if(i < 4) vmulps(ymm2, ymm1, Ymm(4 + i)); if (i < 4)
else vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]); vmulps(ymm2, ymm1, Ymm(4 + i));
else
vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
if (m_sel.fst) if (m_sel.fst)
{ {
@ -252,15 +258,19 @@ void GSSetupPrimCodeGenerator::Color_AVX2()
{ {
// GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32(); // GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32();
if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); if (i < 4)
else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); vmulps(ymm0, ymm2, Ymm(4 + i));
else
vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
vcvttps2dq(ymm0, ymm0); vcvttps2dq(ymm0, ymm0);
vpackssdw(ymm0, ymm0); vpackssdw(ymm0, ymm0);
// GSVector4i b = GSVector8i(db * shift[1 + i]).ps32(); // GSVector4i b = GSVector8i(db * shift[1 + i]).ps32();
if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i)); if (i < 4)
else vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); vmulps(ymm1, ymm3, Ymm(4 + i));
else
vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
vcvttps2dq(ymm1, ymm1); vcvttps2dq(ymm1, ymm1);
vpackssdw(ymm1, ymm1); vpackssdw(ymm1, ymm1);
@ -284,15 +294,19 @@ void GSSetupPrimCodeGenerator::Color_AVX2()
{ {
// GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32(); // GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32();
if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); if (i < 4)
else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); vmulps(ymm0, ymm2, Ymm(4 + i));
else
vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
vcvttps2dq(ymm0, ymm0); vcvttps2dq(ymm0, ymm0);
vpackssdw(ymm0, ymm0); vpackssdw(ymm0, ymm0);
// GSVector8i a = GSVector8i(da * shift[1 + i]).ps32(); // GSVector8i a = GSVector8i(da * shift[1 + i]).ps32();
if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i)); if (i < 4)
else vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); vmulps(ymm1, ymm3, Ymm(4 + i));
else
vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
vcvttps2dq(ymm1, ymm1); vcvttps2dq(ymm1, ymm1);
vpackssdw(ymm1, ymm1); vpackssdw(ymm1, ymm1);

View File

@ -109,7 +109,8 @@ void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
void GSTextureCacheSW::RemoveAll() void GSTextureCacheSW::RemoveAll()
{ {
for(auto i : m_textures) delete i; for (auto i : m_textures)
delete i;
m_textures.clear(); m_textures.clear();

View File

@ -263,4 +263,3 @@ struct alignas(32) GSVertexSW2
}; };
#endif #endif

View File

@ -24,14 +24,14 @@
#include "GSCaptureDlg.h" #include "GSCaptureDlg.h"
#define BeginEnumSysDev(clsid, pMoniker) \ #define BeginEnumSysDev(clsid, pMoniker) \
{CComPtr<ICreateDevEnum> pDevEnum4$##clsid; \ { \
CComPtr<ICreateDevEnum> pDevEnum4$##clsid; \
pDevEnum4$##clsid.CoCreateInstance(CLSID_SystemDeviceEnum); \ pDevEnum4$##clsid.CoCreateInstance(CLSID_SystemDeviceEnum); \
CComPtr<IEnumMoniker> pClassEnum4$##clsid; \ CComPtr<IEnumMoniker> pClassEnum4$##clsid; \
if(SUCCEEDED(pDevEnum4$##clsid->CreateClassEnumerator(clsid, &pClassEnum4$##clsid, 0)) \ if (SUCCEEDED(pDevEnum4$##clsid->CreateClassEnumerator(clsid, &pClassEnum4$##clsid, 0)) && pClassEnum4$##clsid) \
&& pClassEnum4$##clsid) \
{ \ { \
for (CComPtr<IMoniker> pMoniker; pClassEnum4$##clsid->Next(1, &pMoniker, 0) == S_OK; pMoniker = NULL) \ for (CComPtr<IMoniker> pMoniker; pClassEnum4$##clsid->Next(1, &pMoniker, 0) == S_OK; pMoniker = NULL) \
{ \ {
#define EndEnumSysDev }}} #define EndEnumSysDev }}}
@ -55,7 +55,8 @@ int GSCaptureDlg::GetSelCodec(Codec& c)
if (ComboBoxGetSelData(IDC_CODECS, data)) if (ComboBoxGetSelData(IDC_CODECS, data))
{ {
if(data == 0) return 2; if (data == 0)
return 2;
c = *(Codec*)data; c = *(Codec*)data;
@ -63,7 +64,8 @@ int GSCaptureDlg::GetSelCodec(Codec& c)
{ {
c.moniker->BindToObject(NULL, NULL, __uuidof(IBaseFilter), (void**)&c.filter); c.moniker->BindToObject(NULL, NULL, __uuidof(IBaseFilter), (void**)&c.filter);
if(!c.filter) return 0; if (!c.filter)
return 0;
} }
return 1; return 1;

View File

@ -91,11 +91,13 @@ INT_PTR CALLBACK GSDialog::DialogProc(HWND hWnd, UINT message, WPARAM wParam, LP
UINT GSDialog::GetTooltipStructSize() UINT GSDialog::GetTooltipStructSize()
{ {
DLLGETVERSIONPROC dllGetVersion = (DLLGETVERSIONPROC)GetProcAddress(GetModuleHandle(L"ComCtl32.dll"), "DllGetVersion"); DLLGETVERSIONPROC dllGetVersion = (DLLGETVERSIONPROC)GetProcAddress(GetModuleHandle(L"ComCtl32.dll"), "DllGetVersion");
if (dllGetVersion) { if (dllGetVersion)
{
DLLVERSIONINFO2 dllversion = {0}; DLLVERSIONINFO2 dllversion = {0};
dllversion.info1.cbSize = sizeof(DLLVERSIONINFO2); dllversion.info1.cbSize = sizeof(DLLVERSIONINFO2);
if (dllGetVersion((DLLVERSIONINFO*)&dllversion) == S_OK) { if (dllGetVersion((DLLVERSIONINFO*)&dllversion) == S_OK)
{
// Minor, then major version. // Minor, then major version.
DWORD version = MAKELONG(dllversion.info1.dwMinorVersion, dllversion.info1.dwMajorVersion); DWORD version = MAKELONG(dllversion.info1.dwMinorVersion, dllversion.info1.dwMajorVersion);
DWORD tooltip_v3 = MAKELONG(0, 6); DWORD tooltip_v3 = MAKELONG(0, 6);
@ -269,7 +271,8 @@ void GSDialog::ComboBoxFixDroppedWidth(UINT id)
{ {
size.cx += 10; size.cx += 10;
if(size.cx > width) width = size.cx; if (size.cx > width)
width = size.cx;
} }
delete[] buff; delete[] buff;
@ -307,7 +310,6 @@ void GSDialog::OpenFileDialog(UINT id, const wchar_t *title)
SendMessage(GetDlgItem(m_hWnd, id), WM_SETTEXT, 0, (LPARAM)filename); SendMessage(GetDlgItem(m_hWnd, id), WM_SETTEXT, 0, (LPARAM)filename);
SetCurrentDirectory(current_directory); SetCurrentDirectory(current_directory);
} }
void GSDialog::AddTooltip(UINT id) void GSDialog::AddTooltip(UINT id)

View File

@ -30,10 +30,10 @@ struct GSSetting
std::string note; std::string note;
template <typename T> template <typename T>
explicit GSSetting(T value, const char* name, const char* note) : explicit GSSetting(T value, const char* name, const char* note)
value(static_cast<int32_t>(value)), : value(static_cast<int32_t>(value))
name(name), , name(name)
note(note) , note(note)
{ {
} }
}; };
@ -45,7 +45,8 @@ const char* dialog_message(int ID, bool* updateText = NULL);
#endif #endif
#ifndef _WIN32 #ifndef _WIN32
enum { enum
{
IDC_FILTER, IDC_FILTER,
IDC_HALF_SCREEN_TS, IDC_HALF_SCREEN_TS,
IDC_TRI_FILTER, IDC_TRI_FILTER,

View File

@ -377,9 +377,10 @@ void GSSettingsDlg::UpdateControls()
// Shader Configuration Dialog // Shader Configuration Dialog
GSShaderDlg::GSShaderDlg() : GSShaderDlg::GSShaderDlg()
GSDialog(IDD_SHADER) : GSDialog(IDD_SHADER)
{} {
}
void GSShaderDlg::OnInit() void GSShaderDlg::OnInit()
{ {
@ -486,7 +487,8 @@ bool GSShaderDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
wprintf(text, "%d", m_contrast); wprintf(text, "%d", m_contrast);
SetDlgItemText(m_hWnd, IDC_CONTRAST_VALUE, text); SetDlgItemText(m_hWnd, IDC_CONTRAST_VALUE, text);
} }
} break; }
break;
case WM_COMMAND: case WM_COMMAND:
{ {
@ -531,7 +533,8 @@ bool GSShaderDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
theApp.SetConfig("shaderfx_conf", output.c_str()); theApp.SetConfig("shaderfx_conf", output.c_str());
EndDialog(m_hWnd, id); EndDialog(m_hWnd, id);
} break; }
break;
case IDC_SHADEBOOST: case IDC_SHADEBOOST:
UpdateControls(); UpdateControls();
case IDC_SHADER_FX: case IDC_SHADER_FX:
@ -551,14 +554,18 @@ bool GSShaderDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
case IDCANCEL: case IDCANCEL:
{ {
EndDialog(m_hWnd, IDCANCEL); EndDialog(m_hWnd, IDCANCEL);
} break;
} }
break;
}
}
break;
} break; case WM_CLOSE:
EndDialog(m_hWnd, IDCANCEL);
break;
case WM_CLOSE:EndDialog(m_hWnd, IDCANCEL); break; default:
return false;
default: return false;
} }
@ -798,19 +805,24 @@ bool GSHacksDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
theApp.SetConfig("UserHacks_TCOffsetY", (int)SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETY), UDM_GETPOS, 0, 0)); theApp.SetConfig("UserHacks_TCOffsetY", (int)SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETY), UDM_GETPOS, 0, 0));
EndDialog(m_hWnd, id); EndDialog(m_hWnd, id);
} break; }
break;
case IDCANCEL: case IDCANCEL:
{ {
EndDialog(m_hWnd, IDCANCEL); EndDialog(m_hWnd, IDCANCEL);
} break;
} }
break;
}
}
break;
} break; case WM_CLOSE:
EndDialog(m_hWnd, IDCANCEL);
break;
case WM_CLOSE:EndDialog(m_hWnd, IDCANCEL); break; default:
return false;
default: return false;
} }
return true; return true;
@ -818,9 +830,10 @@ bool GSHacksDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
// OSD Configuration Dialog // OSD Configuration Dialog
GSOSDDlg::GSOSDDlg() : GSOSDDlg::GSOSDDlg()
GSDialog(IDD_OSD) : GSDialog(IDD_OSD)
{} {
}
void GSOSDDlg::OnInit() void GSOSDDlg::OnInit()
{ {
@ -951,7 +964,8 @@ bool GSOSDDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
wprintf(text, "%d", m_color.b); wprintf(text, "%d", m_color.b);
SetDlgItemText(m_hWnd, IDC_OSD_COLOR_BLUE_AMOUNT, text); SetDlgItemText(m_hWnd, IDC_OSD_COLOR_BLUE_AMOUNT, text);
} }
} break; }
break;
case WM_COMMAND: case WM_COMMAND:
{ {
@ -974,7 +988,8 @@ bool GSOSDDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
theApp.SetConfig("osd_monitor_enabled", (int)IsDlgButtonChecked(m_hWnd, IDC_OSD_MONITOR)); theApp.SetConfig("osd_monitor_enabled", (int)IsDlgButtonChecked(m_hWnd, IDC_OSD_MONITOR));
EndDialog(m_hWnd, id); EndDialog(m_hWnd, id);
} break; }
break;
case IDC_OSD_LOG: case IDC_OSD_LOG:
if (HIWORD(wParam) == BN_CLICKED) if (HIWORD(wParam) == BN_CLICKED)
UpdateControls(); UpdateControls();
@ -992,14 +1007,18 @@ bool GSOSDDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
case IDCANCEL: case IDCANCEL:
{ {
EndDialog(m_hWnd, IDCANCEL); EndDialog(m_hWnd, IDCANCEL);
} break;
} }
break;
}
}
break;
} break; case WM_CLOSE:
EndDialog(m_hWnd, IDCANCEL);
break;
case WM_CLOSE:EndDialog(m_hWnd, IDCANCEL); break; default:
return false;
default: return false;
} }

View File

@ -83,7 +83,12 @@ class GSSettingsDlg : public GSDialog
std::string name; std::string name;
std::string id; std::string id;
D3D_FEATURE_LEVEL level; D3D_FEATURE_LEVEL level;
Adapter(const std::string &n, const std::string &i, const D3D_FEATURE_LEVEL &l) : name(n), id(i), level(l) {} Adapter(const std::string& n, const std::string& i, const D3D_FEATURE_LEVEL& l)
: name(n)
, id(i)
, level(l)
{
}
}; };
std::vector<GSSetting> m_renderers; std::vector<GSSetting> m_renderers;

View File

@ -31,8 +31,11 @@ protected:
bool m_managed; // set true when we're attached to a 3rdparty window that's amanged by the emulator bool m_managed; // set true when we're attached to a 3rdparty window that's amanged by the emulator
public: public:
GSWnd() : m_managed(false) {}; GSWnd()
virtual ~GSWnd() {}; : m_managed(false)
{
}
virtual ~GSWnd() {}
virtual bool Create(const std::string& title, int w, int h) = 0; virtual bool Create(const std::string& title, int w, int h) = 0;
virtual bool Attach(void* handle, bool managed = true) = 0; virtual bool Attach(void* handle, bool managed = true) = 0;
@ -44,16 +47,15 @@ public:
virtual GSVector4i GetClientRect() = 0; virtual GSVector4i GetClientRect() = 0;
virtual bool SetWindowText(const char* title) = 0; virtual bool SetWindowText(const char* title) = 0;
virtual void AttachContext() {}; virtual void AttachContext() {}
virtual void DetachContext() {}; virtual void DetachContext() {}
virtual void Show() = 0; virtual void Show() = 0;
virtual void Hide() = 0; virtual void Hide() = 0;
virtual void HideFrame() = 0; virtual void HideFrame() = 0;
virtual void Flip() {}; virtual void Flip() {}
virtual void SetVSync(int vsync) {}; virtual void SetVSync(int vsync) {}
}; };
class GSWndGL : public GSWnd class GSWndGL : public GSWnd
@ -73,8 +75,13 @@ protected:
virtual bool HasLateVsyncSupport() = 0; virtual bool HasLateVsyncSupport() = 0;
public: public:
GSWndGL() : m_ctx_attached(false), m_vsync_change_requested(false), m_vsync(0) {}; GSWndGL()
virtual ~GSWndGL() {}; : m_ctx_attached(false)
, m_vsync_change_requested(false)
, m_vsync(0)
{
}
virtual ~GSWndGL() {}
virtual bool Create(const std::string& title, int w, int h) = 0; virtual bool Create(const std::string& title, int w, int h) = 0;
virtual bool Attach(void* handle, bool managed = true) = 0; virtual bool Attach(void* handle, bool managed = true) = 0;

View File

@ -179,7 +179,8 @@ GSVector4i GSWndDX::GetClientRect()
bool GSWndDX::SetWindowText(const char* title) bool GSWndDX::SetWindowText(const char* title)
{ {
if(!m_managed) return false; if (!m_managed)
return false;
const size_t tmp_size = strlen(title) + 1; const size_t tmp_size = strlen(title) + 1;
std::wstring tmp(tmp_size, L'#'); std::wstring tmp(tmp_size, L'#');
@ -191,7 +192,8 @@ bool GSWndDX::SetWindowText(const char* title)
void GSWndDX::Show() void GSWndDX::Show()
{ {
if(!m_managed) return; if (!m_managed)
return;
SetForegroundWindow(m_hWnd); SetForegroundWindow(m_hWnd);
ShowWindow(m_hWnd, SW_SHOWNORMAL); ShowWindow(m_hWnd, SW_SHOWNORMAL);
@ -200,14 +202,16 @@ void GSWndDX::Show()
void GSWndDX::Hide() void GSWndDX::Hide()
{ {
if(!m_managed) return; if (!m_managed)
return;
ShowWindow(m_hWnd, SW_HIDE); ShowWindow(m_hWnd, SW_HIDE);
} }
void GSWndDX::HideFrame() void GSWndDX::HideFrame()
{ {
if(!m_managed) return; if (!m_managed)
return;
SetWindowLong(m_hWnd, GWL_STYLE, GetWindowLong(m_hWnd, GWL_STYLE) & ~(WS_CAPTION | WS_THICKFRAME)); SetWindowLong(m_hWnd, GWL_STYLE, GetWindowLong(m_hWnd, GWL_STYLE) & ~(WS_CAPTION | WS_THICKFRAME));
SetWindowPos(m_hWnd, NULL, 0, 0, 0, 0, SWP_NOSIZE | SWP_NOMOVE | SWP_NOZORDER | SWP_NOACTIVATE); SetWindowPos(m_hWnd, NULL, 0, 0, 0, 0, SWP_NOSIZE | SWP_NOMOVE | SWP_NOZORDER | SWP_NOACTIVATE);

View File

@ -29,27 +29,31 @@ int GSWndEGL::SelectPlatform()
{ {
// Check the supported extension // Check the supported extension
const char* client_extensions = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS); const char* client_extensions = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS);
if (!client_extensions) { if (!client_extensions)
{
fprintf(stderr, "EGL: Client extension not supported\n"); fprintf(stderr, "EGL: Client extension not supported\n");
return 0; return 0;
} }
fprintf(stdout, "EGL: Supported extensions: %s\n", client_extensions); fprintf(stdout, "EGL: Supported extensions: %s\n", client_extensions);
// Check platform extensions are supported (Note: there are core in 1.5) // Check platform extensions are supported (Note: there are core in 1.5)
if (!strstr(client_extensions, "EGL_EXT_platform_base")) { if (!strstr(client_extensions, "EGL_EXT_platform_base"))
{
fprintf(stderr, "EGL: Dynamic platform selection isn't supported\n"); fprintf(stderr, "EGL: Dynamic platform selection isn't supported\n");
return 0; return 0;
} }
// Finally we can select the platform // Finally we can select the platform
#if GS_EGL_X11 #if GS_EGL_X11
if (strstr(client_extensions, "EGL_EXT_platform_x11")) { if (strstr(client_extensions, "EGL_EXT_platform_x11"))
{
fprintf(stdout, "EGL: select X11 platform\n"); fprintf(stdout, "EGL: select X11 platform\n");
return EGL_PLATFORM_X11_KHR; return EGL_PLATFORM_X11_KHR;
} }
#endif #endif
#if GS_EGL_WL #if GS_EGL_WL
if (strstr(client_extensions, "EGL_EXT_platform_wayland")) { if (strstr(client_extensions, "EGL_EXT_platform_wayland"))
{
fprintf(stdout, "EGL: select Wayland platform\n"); fprintf(stdout, "EGL: select Wayland platform\n");
return EGL_PLATFORM_WAYLAND_KHR; return EGL_PLATFORM_WAYLAND_KHR;
} }
@ -111,7 +115,8 @@ void GSWndEGL::CreateContext(int major, int minor)
m_eglContext = eglCreateContext(m_eglDisplay, eglConfig, EGL_NO_CONTEXT, contextAttribs); m_eglContext = eglCreateContext(m_eglDisplay, eglConfig, EGL_NO_CONTEXT, contextAttribs);
EGLint status = eglGetError(); EGLint status = eglGetError();
if (status == EGL_BAD_ATTRIBUTE || status == EGL_BAD_MATCH) { if (status == EGL_BAD_ATTRIBUTE || status == EGL_BAD_MATCH)
{
// Radeon/Gallium don't support advance attribute. Fallback to random value // Radeon/Gallium don't support advance attribute. Fallback to random value
// Note: Intel gives an EGL_BAD_MATCH. I don't know why but let's by stubborn and retry. // Note: Intel gives an EGL_BAD_MATCH. I don't know why but let's by stubborn and retry.
fprintf(stderr, "EGL: warning your driver doesn't support advance openGL context attributes\n"); fprintf(stderr, "EGL: warning your driver doesn't support advance openGL context attributes\n");
@ -133,7 +138,8 @@ void GSWndEGL::CreateContext(int major, int minor)
void GSWndEGL::AttachContext() void GSWndEGL::AttachContext()
{ {
if (!IsContextAttached()) { if (!IsContextAttached())
{
// The setting of the API is local to a thread. This function // The setting of the API is local to a thread. This function
// can be called from 2 threads. // can be called from 2 threads.
BindAPI(); BindAPI();
@ -146,7 +152,8 @@ void GSWndEGL::AttachContext()
void GSWndEGL::DetachContext() void GSWndEGL::DetachContext()
{ {
if (IsContextAttached()) { if (IsContextAttached())
{
//fprintf(stderr, "Detach the context\n"); //fprintf(stderr, "Detach the context\n");
eglMakeCurrent(m_eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); eglMakeCurrent(m_eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
m_ctx_attached = false; m_ctx_attached = false;
@ -161,7 +168,8 @@ void GSWndEGL::BindAPI()
{ {
eglBindAPI(EGL_OPENGL_API); eglBindAPI(EGL_OPENGL_API);
EGLenum api = eglQueryAPI(); EGLenum api = eglQueryAPI();
if (api != EGL_OPENGL_API) { if (api != EGL_OPENGL_API)
{
fprintf(stderr, "EGL: Failed to bind the OpenGL API got 0x%x instead\n", api); fprintf(stderr, "EGL: Failed to bind the OpenGL API got 0x%x instead\n", api);
throw GSDXRecoverableError(); throw GSDXRecoverableError();
} }
@ -198,7 +206,8 @@ void GSWndEGL::Detach()
bool GSWndEGL::Create(const std::string& title, int w, int h) bool GSWndEGL::Create(const std::string& title, int w, int h)
{ {
if(w <= 0 || h <= 0) { if (w <= 0 || h <= 0)
{
w = theApp.GetConfigI("ModeWidth"); w = theApp.GetConfigI("ModeWidth");
h = theApp.GetConfigI("ModeHeight"); h = theApp.GetConfigI("ModeHeight");
} }
@ -217,7 +226,8 @@ bool GSWndEGL::Create(const std::string& title, int w, int h)
void* GSWndEGL::GetProcAddress(const char* name, bool opt) void* GSWndEGL::GetProcAddress(const char* name, bool opt)
{ {
void* ptr = (void*)eglGetProcAddress(name); void* ptr = (void*)eglGetProcAddress(name);
if (ptr == nullptr) { if (ptr == nullptr)
{
if (theApp.GetConfigB("debug_opengl")) if (theApp.GetConfigB("debug_opengl"))
fprintf(stderr, "Failed to find %s\n", name); fprintf(stderr, "Failed to find %s\n", name);
@ -267,12 +277,14 @@ void GSWndEGL::OpenEGLDisplay()
// Create an EGL display from the native display // Create an EGL display from the native display
m_eglDisplay = eglGetPlatformDisplay(m_platform, native_display, nullptr); m_eglDisplay = eglGetPlatformDisplay(m_platform, native_display, nullptr);
if (m_eglDisplay == EGL_NO_DISPLAY) { if (m_eglDisplay == EGL_NO_DISPLAY)
{
fprintf(stderr, "EGL: Failed to open a display! (0x%x)\n", eglGetError()); fprintf(stderr, "EGL: Failed to open a display! (0x%x)\n", eglGetError());
throw GSDXRecoverableError(); throw GSDXRecoverableError();
} }
if (!eglInitialize(m_eglDisplay, nullptr, nullptr)) { if (!eglInitialize(m_eglDisplay, nullptr, nullptr))
{
fprintf(stderr, "EGL: Failed to initialize the display! (0x%x)\n", eglGetError()); fprintf(stderr, "EGL: Failed to initialize the display! (0x%x)\n", eglGetError());
throw GSDXRecoverableError(); throw GSDXRecoverableError();
} }
@ -305,7 +317,8 @@ void *GSWndEGL_X11::CreateNativeWindow(int w, int h)
XMapWindow (m_NativeDisplay, m_NativeWindow); XMapWindow (m_NativeDisplay, m_NativeWindow);
#endif #endif
if (m_NativeDisplay == nullptr) { if (m_NativeDisplay == nullptr)
{
fprintf(stderr, "EGL X11: display wasn't created before the window\n"); fprintf(stderr, "EGL X11: display wasn't created before the window\n");
throw GSDXRecoverableError(); throw GSDXRecoverableError();
} }
@ -318,7 +331,8 @@ void *GSWndEGL_X11::CreateNativeWindow(int w, int h)
m_NativeWindow = xcb_generate_id(c); m_NativeWindow = xcb_generate_id(c);
if (m_NativeWindow == 0) { if (m_NativeWindow == 0)
{
fprintf(stderr, "EGL X11: failed to create the native window\n"); fprintf(stderr, "EGL X11: failed to create the native window\n");
throw GSDXRecoverableError(); throw GSDXRecoverableError();
} }
@ -341,7 +355,8 @@ void *GSWndEGL_X11::AttachNativeWindow(void *handle)
void GSWndEGL_X11::DestroyNativeResources() void GSWndEGL_X11::DestroyNativeResources()
{ {
if (m_NativeDisplay) { if (m_NativeDisplay)
{
XCloseDisplay(m_NativeDisplay); XCloseDisplay(m_NativeDisplay);
m_NativeDisplay = nullptr; m_NativeDisplay = nullptr;
} }
@ -349,7 +364,8 @@ void GSWndEGL_X11::DestroyNativeResources()
bool GSWndEGL_X11::SetWindowText(const char* title) bool GSWndEGL_X11::SetWindowText(const char* title)
{ {
if (!m_managed) return true; if (!m_managed)
return true;
xcb_connection_t* c = XGetXCBConnection(m_NativeDisplay); xcb_connection_t* c = XGetXCBConnection(m_NativeDisplay);
@ -393,7 +409,8 @@ void *GSWndEGL_WL::AttachNativeWindow(void *handle)
void GSWndEGL_WL::DestroyNativeResources() void GSWndEGL_WL::DestroyNativeResources()
{ {
if (m_NativeDisplay) { if (m_NativeDisplay)
{
wl_display_disconnect(m_NativeDisplay); wl_display_disconnect(m_NativeDisplay);
m_NativeDisplay = nullptr; m_NativeDisplay = nullptr;
} }

View File

@ -71,9 +71,9 @@ public:
void Flip() final; void Flip() final;
// Deprecated API // Deprecated API
void Show() final {}; void Show() final {}
void Hide() final {}; void Hide() final {}
void HideFrame() final {}; // DX9 API void HideFrame() final {} // DX9 API
virtual void* GetDisplay() = 0; // GSopen1 API virtual void* GetDisplay() = 0; // GSopen1 API
virtual void* GetHandle() = 0; // DX API virtual void* GetHandle() = 0; // DX API

View File

@ -30,10 +30,13 @@ static void win_error(const wchar_t* msg, bool fatal = true)
if (errorID) if (errorID)
fprintf(stderr, "WIN API ERROR:%ld\t", errorID); fprintf(stderr, "WIN API ERROR:%ld\t", errorID);
if (fatal) { if (fatal)
{
MessageBox(NULL, msg, L"ERROR", MB_OK | MB_ICONEXCLAMATION); MessageBox(NULL, msg, L"ERROR", MB_OK | MB_ICONEXCLAMATION);
throw GSDXRecoverableError(); throw GSDXRecoverableError();
} else { }
else
{
fprintf(stderr, "ERROR:%ls\n", msg); fprintf(stderr, "ERROR:%ls\n", msg);
} }
} }
@ -105,7 +108,8 @@ void GSWndWGL::CreateContext(int major, int minor)
win_error(L"Failed to init wglCreateContextAttribsARB function pointer"); win_error(L"Failed to init wglCreateContextAttribsARB function pointer");
HGLRC context30 = wglCreateContextAttribsARB(m_NativeDisplay, NULL, context_attribs); HGLRC context30 = wglCreateContextAttribsARB(m_NativeDisplay, NULL, context_attribs);
if (!context30) { if (!context30)
{
win_error(L"Failed to create a 3.x context with standard flags", false); win_error(L"Failed to create a 3.x context with standard flags", false);
// retry with more compatible option for (Mesa on Windows, OpenGL on WINE) // retry with more compatible option for (Mesa on Windows, OpenGL on WINE)
context_attribs[2 * 2 + 1] = 0; context_attribs[2 * 2 + 1] = 0;
@ -125,7 +129,8 @@ void GSWndWGL::CreateContext(int major, int minor)
void GSWndWGL::AttachContext() void GSWndWGL::AttachContext()
{ {
if (!IsContextAttached()) { if (!IsContextAttached())
{
wglMakeCurrent(m_NativeDisplay, m_context); wglMakeCurrent(m_NativeDisplay, m_context);
m_ctx_attached = true; m_ctx_attached = true;
} }
@ -133,7 +138,8 @@ void GSWndWGL::AttachContext()
void GSWndWGL::DetachContext() void GSWndWGL::DetachContext()
{ {
if (IsContextAttached()) { if (IsContextAttached())
{
wglMakeCurrent(NULL, NULL); wglMakeCurrent(NULL, NULL);
m_ctx_attached = false; m_ctx_attached = false;
} }
@ -145,10 +151,13 @@ void GSWndWGL::PopulateWndGlFunction()
// To ease the process, extension management is itself an extension. Clever isn't it! // To ease the process, extension management is itself an extension. Clever isn't it!
PFNWGLGETEXTENSIONSSTRINGARBPROC wglGetExtensionsStringARB = (PFNWGLGETEXTENSIONSSTRINGARBPROC)wglGetProcAddress("wglGetExtensionsStringARB"); PFNWGLGETEXTENSIONSSTRINGARBPROC wglGetExtensionsStringARB = (PFNWGLGETEXTENSIONSSTRINGARBPROC)wglGetProcAddress("wglGetExtensionsStringARB");
if (wglGetExtensionsStringARB) { if (wglGetExtensionsStringARB)
{
const char* ext = wglGetExtensionsStringARB(m_NativeDisplay); const char* ext = wglGetExtensionsStringARB(m_NativeDisplay);
m_has_late_vsync = m_swapinterval && ext && strstr(ext, "WGL_EXT_swap_control_tear"); m_has_late_vsync = m_swapinterval && ext && strstr(ext, "WGL_EXT_swap_control_tear");
} else { }
else
{
m_has_late_vsync = false; m_has_late_vsync = false;
} }
} }
@ -173,7 +182,8 @@ void GSWndWGL::Detach()
// The window still need to be closed // The window still need to be closed
DetachContext(); DetachContext();
if (m_context) wglDeleteContext(m_context); if (m_context)
wglDeleteContext(m_context);
m_context = NULL; m_context = NULL;
CloseWGLDisplay(); CloseWGLDisplay();
@ -184,14 +194,12 @@ void GSWndWGL::Detach()
DestroyWindow(m_NativeWindow); DestroyWindow(m_NativeWindow);
m_NativeWindow = NULL; m_NativeWindow = NULL;
} }
} }
void GSWndWGL::OpenWGLDisplay() void GSWndWGL::OpenWGLDisplay()
{ {
GLuint PixelFormat; // Holds The Results After Searching For A Match GLuint PixelFormat; // Holds The Results After Searching For A Match
PIXELFORMATDESCRIPTOR pfd = // pfd Tells Windows How We Want Things To Be PIXELFORMATDESCRIPTOR pfd = // pfd Tells Windows How We Want Things To Be
{ {
sizeof(PIXELFORMATDESCRIPTOR), // Size Of This Pixel Format Descriptor sizeof(PIXELFORMATDESCRIPTOR), // Size Of This Pixel Format Descriptor
1, // Version Number 1, // Version Number
@ -239,7 +247,8 @@ void GSWndWGL::CloseWGLDisplay()
// modifications // modifications
bool GSWndWGL::Create(const std::string& title, int w, int h) bool GSWndWGL::Create(const std::string& title, int w, int h)
{ {
if(m_NativeWindow) return false; if (m_NativeWindow)
return false;
m_managed = true; m_managed = true;
@ -293,7 +302,8 @@ bool GSWndWGL::Create(const std::string& title, int w, int h)
std::wstring tmp = std::wstring(title.begin(), title.end()); std::wstring tmp = std::wstring(title.begin(), title.end());
m_NativeWindow = CreateWindow(wc.lpszClassName, tmp.c_str(), style, r.left, r.top, r.width(), r.height(), NULL, NULL, wc.hInstance, (LPVOID)this); m_NativeWindow = CreateWindow(wc.lpszClassName, tmp.c_str(), style, r.left, r.top, r.width(), r.height(), NULL, NULL, wc.hInstance, (LPVOID)this);
if (m_NativeWindow == NULL) return false; if (m_NativeWindow == NULL)
return false;
OpenWGLDisplay(); OpenWGLDisplay();
@ -327,7 +337,8 @@ void* GSWndWGL::GetProcAddress(const char* name, bool opt)
ptr = (void *)GetProcAddress(module, name); ptr = (void *)GetProcAddress(module, name);
} }
#endif #endif
if (ptr == NULL) { if (ptr == NULL)
{
if (theApp.GetConfigB("debug_opengl")) if (theApp.GetConfigB("debug_opengl"))
fprintf(stderr, "Failed to find %s\n", name); fprintf(stderr, "Failed to find %s\n", name);
@ -344,7 +355,8 @@ void GSWndWGL::SetSwapInterval()
// m_swapinterval uses an integer as parameter // m_swapinterval uses an integer as parameter
// 0 -> disable vsync // 0 -> disable vsync
// n -> wait n frame // n -> wait n frame
if (m_swapinterval) m_swapinterval(m_vsync); if (m_swapinterval)
m_swapinterval(m_vsync);
} }
void GSWndWGL::Flip() void GSWndWGL::Flip()
@ -357,7 +369,8 @@ void GSWndWGL::Flip()
void GSWndWGL::Show() void GSWndWGL::Show()
{ {
if (!m_managed) return; if (!m_managed)
return;
// Used by GSReplay // Used by GSReplay
SetForegroundWindow(m_NativeWindow); SetForegroundWindow(m_NativeWindow);
@ -378,7 +391,8 @@ void GSWndWGL::HideFrame()
bool GSWndWGL::SetWindowText(const char* title) bool GSWndWGL::SetWindowText(const char* title)
{ {
if (!m_managed) return false; if (!m_managed)
return false;
const size_t tmp_size = strlen(title) + 1; const size_t tmp_size = strlen(title) + 1;
std::wstring tmp(tmp_size, L'#'); std::wstring tmp(tmp_size, L'#');

View File

@ -45,7 +45,7 @@ class GSWndWGL : public GSWndGL
public: public:
GSWndWGL(); GSWndWGL();
virtual ~GSWndWGL() {}; virtual ~GSWndWGL() {}
bool Create(const std::string& title, int w, int h); bool Create(const std::string& title, int w, int h);
bool Attach(void* handle, bool managed = true); bool Attach(void* handle, bool managed = true);

Some files were not shown because too many files have changed in this diff Show More