mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl-debug: allow to dump various ps shader
Nvidia allows to get the ASM of the shader of the compiled shader. It is useful to check the performance. It also allow me to compile most of shader code path for QA Dump is enabled in linux replayer + debug_glsl_shader = 2
This commit is contained in:
parent
344030cbe4
commit
e245b27c97
|
@ -95,6 +95,7 @@ PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines = NU
|
||||||
PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv = NULL;
|
PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv = NULL;
|
||||||
PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline = NULL;
|
PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline = NULL;
|
||||||
PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog = NULL;
|
PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog = NULL;
|
||||||
|
PFNGLGETPROGRAMBINARYPROC gl_GetProgramBinary = NULL;
|
||||||
// NO GL4.1
|
// NO GL4.1
|
||||||
PFNGLUSEPROGRAMPROC gl_UseProgram = NULL;
|
PFNGLUSEPROGRAMPROC gl_UseProgram = NULL;
|
||||||
PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog = NULL;
|
PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog = NULL;
|
||||||
|
|
|
@ -278,6 +278,7 @@ extern PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines;
|
||||||
extern PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv;
|
extern PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv;
|
||||||
extern PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline;
|
extern PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline;
|
||||||
extern PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog;
|
extern PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog;
|
||||||
|
extern PFNGLGETPROGRAMBINARYPROC gl_GetProgramBinary;
|
||||||
// NO GL4.1
|
// NO GL4.1
|
||||||
extern PFNGLUSEPROGRAMPROC gl_UseProgram;
|
extern PFNGLUSEPROGRAMPROC gl_UseProgram;
|
||||||
extern PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog;
|
extern PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog;
|
||||||
|
|
|
@ -1532,7 +1532,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
|
||||||
}
|
}
|
||||||
if (s_gs->m_wnd == NULL) return;
|
if (s_gs->m_wnd == NULL) return;
|
||||||
|
|
||||||
{
|
if (theApp.GetConfig("debug_glsl_shader", 0) == 2) {
|
||||||
|
dynamic_cast<GSDeviceOGL*>(s_gs->m_dev)->SelfShaderTest();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
{ // Read .gs content
|
||||||
std::string f(lpszCmdLine);
|
std::string f(lpszCmdLine);
|
||||||
#ifdef LZMA_SUPPORTED
|
#ifdef LZMA_SUPPORTED
|
||||||
GSDumpFile* file = (f.size() >= 4) && (f.compare(f.size()-3, 3, ".xz") == 0)
|
GSDumpFile* file = (f.size() >= 4) && (f.compare(f.size()-3, 3, ".xz") == 0)
|
||||||
|
|
|
@ -669,6 +669,187 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
||||||
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSDeviceOGL::SelfShaderTest()
|
||||||
|
{
|
||||||
|
#define RUN_TEST \
|
||||||
|
do { \
|
||||||
|
GLuint p = CompilePS(sel); \
|
||||||
|
nb_shader++; \
|
||||||
|
perf += m_shader->DumpAsm(file, p); \
|
||||||
|
m_shader->Delete(p); \
|
||||||
|
} while(0);
|
||||||
|
|
||||||
|
#define PRINT_TEST(s) \
|
||||||
|
do { \
|
||||||
|
fprintf(stderr, "%s %d instructions for %d shaders (mean of %4.2f)\n", \
|
||||||
|
s, perf, nb_shader, (float)perf/(float)nb_shader); \
|
||||||
|
all += perf; \
|
||||||
|
perf = 0; \
|
||||||
|
nb_shader = 0; \
|
||||||
|
} while(0);
|
||||||
|
|
||||||
|
int nb_shader = 0;
|
||||||
|
int perf = 0;
|
||||||
|
int all = 0;
|
||||||
|
// Test: SW blending
|
||||||
|
for (int colclip = 0; colclip < 4; colclip += 3) {
|
||||||
|
for (int fmt = 0; fmt < 3; fmt++) {
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
PSSelector sel;
|
||||||
|
sel.atst = 1;
|
||||||
|
sel.tfx = 4;
|
||||||
|
|
||||||
|
int ib = (i + 1) % 3;
|
||||||
|
#if 1
|
||||||
|
sel.blend = i*5;
|
||||||
|
#else
|
||||||
|
sel.blend_a = i;
|
||||||
|
sel.blend_b = ib;;
|
||||||
|
sel.blend_c = i;
|
||||||
|
sel.blend_d = i;
|
||||||
|
#endif
|
||||||
|
sel.colclip = colclip;
|
||||||
|
sel.dfmt = fmt;
|
||||||
|
|
||||||
|
std::string file = format("Shader_Blend_%d_%d_%d_%d__Cclip_%d__Dfmt_%d.glsl.asm",
|
||||||
|
i, ib, i, i, colclip, fmt);
|
||||||
|
RUN_TEST;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PRINT_TEST("Blend");
|
||||||
|
|
||||||
|
// Test: alpha test
|
||||||
|
for (int atst = 0; atst < 8; atst++) {
|
||||||
|
PSSelector sel;
|
||||||
|
sel.tfx = 4;
|
||||||
|
|
||||||
|
sel.atst = atst;
|
||||||
|
std::string file = format("Shader_Atst_%d.glsl.asm", atst);
|
||||||
|
RUN_TEST;
|
||||||
|
}
|
||||||
|
PRINT_TEST("Alpha Tst");
|
||||||
|
|
||||||
|
// Test: fbmask/fog/shuffle/read_ba
|
||||||
|
for (int read_ba = 0; read_ba < 2; read_ba++) {
|
||||||
|
PSSelector sel;
|
||||||
|
sel.tfx = 4;
|
||||||
|
sel.atst = 1;
|
||||||
|
|
||||||
|
sel.fog = 1;
|
||||||
|
sel.fbmask = 1;
|
||||||
|
sel.shuffle = 1;
|
||||||
|
sel.read_ba = read_ba;
|
||||||
|
|
||||||
|
std::string file = format("Shader_Fog__Fbmask__Shuffle__Read_ba_%d.glsl.asm", read_ba);
|
||||||
|
RUN_TEST;
|
||||||
|
}
|
||||||
|
PRINT_TEST("Fbmask/fog/shuffle/read_ba");
|
||||||
|
|
||||||
|
// Test: Date
|
||||||
|
for (int date = 1; date < 7; date++) {
|
||||||
|
PSSelector sel;
|
||||||
|
sel.tfx = 4;
|
||||||
|
sel.atst = 1;
|
||||||
|
|
||||||
|
sel.date = date;
|
||||||
|
std::string file = format("Shader_Date_%d.glsl.asm", date);
|
||||||
|
RUN_TEST;
|
||||||
|
}
|
||||||
|
PRINT_TEST("Date");
|
||||||
|
|
||||||
|
// Test: FBA
|
||||||
|
for (int fmt = 0; fmt < 3; fmt++) {
|
||||||
|
PSSelector sel;
|
||||||
|
sel.tfx = 4;
|
||||||
|
sel.atst = 1;
|
||||||
|
|
||||||
|
sel.fba = 1;
|
||||||
|
sel.dfmt = fmt;
|
||||||
|
sel.clr1 = 1;
|
||||||
|
std::string file = format("Shader_Fba__Clr1__Dfmt_%d.glsl.asm", fmt);
|
||||||
|
RUN_TEST;
|
||||||
|
}
|
||||||
|
PRINT_TEST("Fba/Clr1/Dfmt");
|
||||||
|
|
||||||
|
// Test: Fst/Tc/IIP
|
||||||
|
{
|
||||||
|
PSSelector sel;
|
||||||
|
sel.tfx = 1;
|
||||||
|
sel.atst = 1;
|
||||||
|
|
||||||
|
sel.fst = 0;
|
||||||
|
sel.iip = 1;
|
||||||
|
sel.tcoffsethack = 1;
|
||||||
|
|
||||||
|
std::string file = format("Shader_Fst__TC__Iip.glsl.asm");
|
||||||
|
RUN_TEST;
|
||||||
|
}
|
||||||
|
PRINT_TEST("Fst/Tc/IIp");
|
||||||
|
|
||||||
|
// Test: Colclip
|
||||||
|
for (int colclip = 0; colclip < 3; colclip += 1) {
|
||||||
|
PSSelector sel;
|
||||||
|
sel.tfx = 4;
|
||||||
|
sel.atst = 1;
|
||||||
|
|
||||||
|
sel.colclip = colclip;
|
||||||
|
std::string file = format("Shader_Colclip_%d.glsl.asm", colclip);
|
||||||
|
RUN_TEST;
|
||||||
|
}
|
||||||
|
PRINT_TEST("Colclip");
|
||||||
|
|
||||||
|
// Test: tfx/tcc
|
||||||
|
for (int tfx = 0; tfx < 5; tfx++) {
|
||||||
|
for (int tcc = 0; tcc < 2; tcc++) {
|
||||||
|
PSSelector sel;
|
||||||
|
sel.atst = 1;
|
||||||
|
sel.fst = 1;
|
||||||
|
|
||||||
|
sel.tfx = tfx;
|
||||||
|
sel.tcc = tcc;
|
||||||
|
std::string file = format("Shader_Tfx_%d__Tcc_%d.glsl.asm", tfx, tcc);
|
||||||
|
RUN_TEST;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PRINT_TEST("Tfx/Tcc");
|
||||||
|
|
||||||
|
// Test: Texture Sampling
|
||||||
|
for (int fmt = 0; fmt < 8; fmt++) {
|
||||||
|
if ((fmt & 3) == 3) continue;
|
||||||
|
|
||||||
|
for (int ltf = 0; ltf < 2; ltf++) {
|
||||||
|
for (int aem = 0; aem < 2; aem++) {
|
||||||
|
for (int ifmt = 0; ifmt < 3; ifmt++) {
|
||||||
|
for (int wms = 1; wms < 4; wms++) {
|
||||||
|
for (int wmt = 1; wmt < 4; wmt++) {
|
||||||
|
PSSelector sel;
|
||||||
|
sel.atst = 1;
|
||||||
|
sel.tfx = 1;
|
||||||
|
|
||||||
|
sel.ltf = ltf;
|
||||||
|
sel.aem = aem;
|
||||||
|
sel.fmt = fmt;
|
||||||
|
sel.ifmt = ifmt;
|
||||||
|
sel.wms = wms;
|
||||||
|
sel.wmt = wmt;
|
||||||
|
std::string file = format("Shader_Ltf_%d__Aem_%d__Fmt_%d__Ifmt_%d__Wms_%d__Wmt_%d.glsl.asm",
|
||||||
|
ltf, aem, fmt, ifmt, wms, wmt);
|
||||||
|
RUN_TEST;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PRINT_TEST("Texture Sampling");
|
||||||
|
|
||||||
|
fprintf(stderr, "\nTotal %d\n", all);
|
||||||
|
|
||||||
|
#undef RUN_TEST
|
||||||
|
#undef PRINT_TEST
|
||||||
|
}
|
||||||
|
|
||||||
GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format)
|
GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format)
|
||||||
{
|
{
|
||||||
return GSDevice::CreateRenderTarget(w, h, msaa, format ? format : GL_RGBA8);
|
return GSDevice::CreateRenderTarget(w, h, msaa, format ? format : GL_RGBA8);
|
||||||
|
|
|
@ -658,6 +658,8 @@ class GSDeviceOGL : public GSDevice
|
||||||
GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);
|
GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);
|
||||||
GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, float afix);
|
GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, float afix);
|
||||||
|
|
||||||
|
void SelfShaderTest();
|
||||||
|
|
||||||
|
|
||||||
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
||||||
void SetupVS(VSSelector sel);
|
void SetupVS(VSSelector sel);
|
||||||
|
|
|
@ -393,6 +393,65 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This function will get the binary program. Normally it must be used a caching
|
||||||
|
// solution but Nvidia also incorporates the ASM dump. Asm is nice because it allow
|
||||||
|
// to have an overview of the program performance based on the instruction number
|
||||||
|
// Note: initially I was using cg offline compiler but it doesn't support latest
|
||||||
|
// GLSL improvement (unfortunately).
|
||||||
|
int GSShaderOGL::DumpAsm(const std::string& file, GLuint p)
|
||||||
|
{
|
||||||
|
if (!GLLoader::nvidia_buggy_driver) return 0;
|
||||||
|
|
||||||
|
GLint binaryLength;
|
||||||
|
gl_GetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
|
||||||
|
|
||||||
|
char* binary = new char[binaryLength+4];
|
||||||
|
GLenum binaryFormat;
|
||||||
|
gl_GetProgramBinary(p, binaryLength, NULL, &binaryFormat, binary);
|
||||||
|
|
||||||
|
FILE* outfile = fopen(file.c_str(), "w");
|
||||||
|
ASSERT(outfile);
|
||||||
|
|
||||||
|
// Search the magic number "!!"
|
||||||
|
int asm_ = 0;
|
||||||
|
while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_+1] != '!')) {
|
||||||
|
asm_ += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int instructions = -1;
|
||||||
|
if (asm_ < binaryLength) {
|
||||||
|
// Now print asm as text
|
||||||
|
char* asm_txt = strtok(&binary[asm_], "\n");
|
||||||
|
while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5))) {
|
||||||
|
if (strncmp(asm_txt, "OUT", 3) == 0) {
|
||||||
|
instructions = 0;
|
||||||
|
} else if (instructions >= 0) {
|
||||||
|
if (instructions == 0)
|
||||||
|
fprintf(outfile, "\n");
|
||||||
|
instructions++;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(outfile, "%s\n", asm_txt);
|
||||||
|
asm_txt = strtok(NULL, "\n");
|
||||||
|
}
|
||||||
|
fprintf(outfile, "\nFound %d instructions\n", instructions);
|
||||||
|
}
|
||||||
|
fclose(outfile);
|
||||||
|
|
||||||
|
if (instructions < 0) {
|
||||||
|
// RAW dump in case of error
|
||||||
|
fprintf(stderr, "Error: failed to find the number of instructions!\n");
|
||||||
|
outfile = fopen(file.c_str(), "wb");
|
||||||
|
fwrite(binary, binaryLength, 1, outfile);
|
||||||
|
fclose(outfile);
|
||||||
|
ASSERT(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] binary;
|
||||||
|
|
||||||
|
return instructions;
|
||||||
|
}
|
||||||
|
|
||||||
void GSShaderOGL::Delete(GLuint s)
|
void GSShaderOGL::Delete(GLuint s)
|
||||||
{
|
{
|
||||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||||
|
|
|
@ -55,5 +55,8 @@ class GSShaderOGL {
|
||||||
void UseProgram();
|
void UseProgram();
|
||||||
|
|
||||||
GLuint Compile(const std::string& glsl_file, const std::string& entry, GLenum type, const char* glsl_h_code, const std::string& macro_sel = "");
|
GLuint Compile(const std::string& glsl_file, const std::string& entry, GLenum type, const char* glsl_h_code, const std::string& macro_sel = "");
|
||||||
|
|
||||||
|
int DumpAsm(const std::string& file, GLuint p);
|
||||||
|
|
||||||
void Delete(GLuint s);
|
void Delete(GLuint s);
|
||||||
};
|
};
|
||||||
|
|
|
@ -93,6 +93,7 @@ void GSWndGL::PopulateGlFunction()
|
||||||
*(void**)&(gl_ValidateProgramPipeline) = GetProcAddress("glValidateProgramPipeline", true);
|
*(void**)&(gl_ValidateProgramPipeline) = GetProcAddress("glValidateProgramPipeline", true);
|
||||||
*(void**)&(gl_UseProgramStages) = GetProcAddress("glUseProgramStages", true);
|
*(void**)&(gl_UseProgramStages) = GetProcAddress("glUseProgramStages", true);
|
||||||
*(void**)&(gl_ProgramUniform1i) = GetProcAddress("glProgramUniform1i", true); // but no GL4.2
|
*(void**)&(gl_ProgramUniform1i) = GetProcAddress("glProgramUniform1i", true); // but no GL4.2
|
||||||
|
*(void**)&(gl_GetProgramBinary) = GetProcAddress("glGetProgramBinary", true);
|
||||||
// NO GL4.1
|
// NO GL4.1
|
||||||
*(void**)&(gl_DeleteProgram) = GetProcAddress("glDeleteProgram");
|
*(void**)&(gl_DeleteProgram) = GetProcAddress("glDeleteProgram");
|
||||||
*(void**)&(gl_DeleteShader) = GetProcAddress("glDeleteShader");
|
*(void**)&(gl_DeleteShader) = GetProcAddress("glDeleteShader");
|
||||||
|
|
Loading…
Reference in New Issue