From e245b27c9715d9e16d6da599769000af18f5ebf4 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Wed, 15 Jul 2015 08:59:16 +0200 Subject: [PATCH] gsdx-ogl-debug: allow to dump various ps shader Nvidia allows to get the ASM of the shader of the compiled shader. It is useful to check the performance. It also allow me to compile most of shader code path for QA Dump is enabled in linux replayer + debug_glsl_shader = 2 --- plugins/GSdx/GLLoader.cpp | 1 + plugins/GSdx/GLLoader.h | 1 + plugins/GSdx/GS.cpp | 7 +- plugins/GSdx/GSDeviceOGL.cpp | 181 +++++++++++++++++++++++++++++++++++ plugins/GSdx/GSDeviceOGL.h | 2 + plugins/GSdx/GSShaderOGL.cpp | 59 ++++++++++++ plugins/GSdx/GSShaderOGL.h | 3 + plugins/GSdx/GSWnd.cpp | 1 + 8 files changed, 254 insertions(+), 1 deletion(-) diff --git a/plugins/GSdx/GLLoader.cpp b/plugins/GSdx/GLLoader.cpp index 8ce51f1e01..c8bffcfb6f 100644 --- a/plugins/GSdx/GLLoader.cpp +++ b/plugins/GSdx/GLLoader.cpp @@ -95,6 +95,7 @@ PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines = NU PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv = NULL; PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline = NULL; PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog = NULL; +PFNGLGETPROGRAMBINARYPROC gl_GetProgramBinary = NULL; // NO GL4.1 PFNGLUSEPROGRAMPROC gl_UseProgram = NULL; PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog = NULL; diff --git a/plugins/GSdx/GLLoader.h b/plugins/GSdx/GLLoader.h index c87a2b3560..ea3d5a43df 100644 --- a/plugins/GSdx/GLLoader.h +++ b/plugins/GSdx/GLLoader.h @@ -278,6 +278,7 @@ extern PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines; extern PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv; extern PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline; extern PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog; +extern PFNGLGETPROGRAMBINARYPROC gl_GetProgramBinary; // NO GL4.1 extern PFNGLUSEPROGRAMPROC gl_UseProgram; extern PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog; diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 7dce676eb9..09bb1765ad 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -1532,7 +1532,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) } if (s_gs->m_wnd == NULL) return; - { + if (theApp.GetConfig("debug_glsl_shader", 0) == 2) { + dynamic_cast(s_gs->m_dev)->SelfShaderTest(); + return; + } + + { // Read .gs content std::string f(lpszCmdLine); #ifdef LZMA_SUPPORTED GSDumpFile* file = (f.size() >= 4) && (f.compare(f.size()-3, 3, ".xz") == 0) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 255998c25e..0a5bd0cc0b 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -669,6 +669,187 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro); } +void GSDeviceOGL::SelfShaderTest() +{ +#define RUN_TEST \ + do { \ + GLuint p = CompilePS(sel); \ + nb_shader++; \ + perf += m_shader->DumpAsm(file, p); \ + m_shader->Delete(p); \ + } while(0); + +#define PRINT_TEST(s) \ + do { \ + fprintf(stderr, "%s %d instructions for %d shaders (mean of %4.2f)\n", \ + s, perf, nb_shader, (float)perf/(float)nb_shader); \ + all += perf; \ + perf = 0; \ + nb_shader = 0; \ + } while(0); + + int nb_shader = 0; + int perf = 0; + int all = 0; + // Test: SW blending + for (int colclip = 0; colclip < 4; colclip += 3) { + for (int fmt = 0; fmt < 3; fmt++) { + for (int i = 0; i < 3; i++) { + PSSelector sel; + sel.atst = 1; + sel.tfx = 4; + + int ib = (i + 1) % 3; +#if 1 + sel.blend = i*5; +#else + sel.blend_a = i; + sel.blend_b = ib;; + sel.blend_c = i; + sel.blend_d = i; +#endif + sel.colclip = colclip; + sel.dfmt = fmt; + + std::string file = format("Shader_Blend_%d_%d_%d_%d__Cclip_%d__Dfmt_%d.glsl.asm", + i, ib, i, i, colclip, fmt); + RUN_TEST; + } + } + } + PRINT_TEST("Blend"); + + // Test: alpha test + for (int atst = 0; atst < 8; atst++) { + PSSelector sel; + sel.tfx = 4; + + sel.atst = atst; + std::string file = format("Shader_Atst_%d.glsl.asm", atst); + RUN_TEST; + } + PRINT_TEST("Alpha Tst"); + + // Test: fbmask/fog/shuffle/read_ba + for (int read_ba = 0; read_ba < 2; read_ba++) { + PSSelector sel; + sel.tfx = 4; + sel.atst = 1; + + sel.fog = 1; + sel.fbmask = 1; + sel.shuffle = 1; + sel.read_ba = read_ba; + + std::string file = format("Shader_Fog__Fbmask__Shuffle__Read_ba_%d.glsl.asm", read_ba); + RUN_TEST; + } + PRINT_TEST("Fbmask/fog/shuffle/read_ba"); + + // Test: Date + for (int date = 1; date < 7; date++) { + PSSelector sel; + sel.tfx = 4; + sel.atst = 1; + + sel.date = date; + std::string file = format("Shader_Date_%d.glsl.asm", date); + RUN_TEST; + } + PRINT_TEST("Date"); + + // Test: FBA + for (int fmt = 0; fmt < 3; fmt++) { + PSSelector sel; + sel.tfx = 4; + sel.atst = 1; + + sel.fba = 1; + sel.dfmt = fmt; + sel.clr1 = 1; + std::string file = format("Shader_Fba__Clr1__Dfmt_%d.glsl.asm", fmt); + RUN_TEST; + } + PRINT_TEST("Fba/Clr1/Dfmt"); + + // Test: Fst/Tc/IIP + { + PSSelector sel; + sel.tfx = 1; + sel.atst = 1; + + sel.fst = 0; + sel.iip = 1; + sel.tcoffsethack = 1; + + std::string file = format("Shader_Fst__TC__Iip.glsl.asm"); + RUN_TEST; + } + PRINT_TEST("Fst/Tc/IIp"); + + // Test: Colclip + for (int colclip = 0; colclip < 3; colclip += 1) { + PSSelector sel; + sel.tfx = 4; + sel.atst = 1; + + sel.colclip = colclip; + std::string file = format("Shader_Colclip_%d.glsl.asm", colclip); + RUN_TEST; + } + PRINT_TEST("Colclip"); + + // Test: tfx/tcc + for (int tfx = 0; tfx < 5; tfx++) { + for (int tcc = 0; tcc < 2; tcc++) { + PSSelector sel; + sel.atst = 1; + sel.fst = 1; + + sel.tfx = tfx; + sel.tcc = tcc; + std::string file = format("Shader_Tfx_%d__Tcc_%d.glsl.asm", tfx, tcc); + RUN_TEST; + } + } + PRINT_TEST("Tfx/Tcc"); + + // Test: Texture Sampling + for (int fmt = 0; fmt < 8; fmt++) { + if ((fmt & 3) == 3) continue; + + for (int ltf = 0; ltf < 2; ltf++) { + for (int aem = 0; aem < 2; aem++) { + for (int ifmt = 0; ifmt < 3; ifmt++) { + for (int wms = 1; wms < 4; wms++) { + for (int wmt = 1; wmt < 4; wmt++) { + PSSelector sel; + sel.atst = 1; + sel.tfx = 1; + + sel.ltf = ltf; + sel.aem = aem; + sel.fmt = fmt; + sel.ifmt = ifmt; + sel.wms = wms; + sel.wmt = wmt; + std::string file = format("Shader_Ltf_%d__Aem_%d__Fmt_%d__Ifmt_%d__Wms_%d__Wmt_%d.glsl.asm", + ltf, aem, fmt, ifmt, wms, wmt); + RUN_TEST; + } + } + } + } + } + } + PRINT_TEST("Texture Sampling"); + + fprintf(stderr, "\nTotal %d\n", all); + +#undef RUN_TEST +#undef PRINT_TEST +} + GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format) { return GSDevice::CreateRenderTarget(w, h, msaa, format ? format : GL_RGBA8); diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 62e72e2662..8b249e4da3 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -658,6 +658,8 @@ class GSDeviceOGL : public GSDevice GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel); GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, float afix); + void SelfShaderTest(); + void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); void SetupVS(VSSelector sel); diff --git a/plugins/GSdx/GSShaderOGL.cpp b/plugins/GSdx/GSShaderOGL.cpp index 61f0f437cf..19759c05f8 100644 --- a/plugins/GSdx/GSShaderOGL.cpp +++ b/plugins/GSdx/GSShaderOGL.cpp @@ -393,6 +393,65 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent return program; } +// This function will get the binary program. Normally it must be used a caching +// solution but Nvidia also incorporates the ASM dump. Asm is nice because it allow +// to have an overview of the program performance based on the instruction number +// Note: initially I was using cg offline compiler but it doesn't support latest +// GLSL improvement (unfortunately). +int GSShaderOGL::DumpAsm(const std::string& file, GLuint p) +{ + if (!GLLoader::nvidia_buggy_driver) return 0; + + GLint binaryLength; + gl_GetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength); + + char* binary = new char[binaryLength+4]; + GLenum binaryFormat; + gl_GetProgramBinary(p, binaryLength, NULL, &binaryFormat, binary); + + FILE* outfile = fopen(file.c_str(), "w"); + ASSERT(outfile); + + // Search the magic number "!!" + int asm_ = 0; + while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_+1] != '!')) { + asm_ += 1; + } + + int instructions = -1; + if (asm_ < binaryLength) { + // Now print asm as text + char* asm_txt = strtok(&binary[asm_], "\n"); + while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5))) { + if (strncmp(asm_txt, "OUT", 3) == 0) { + instructions = 0; + } else if (instructions >= 0) { + if (instructions == 0) + fprintf(outfile, "\n"); + instructions++; + } + + fprintf(outfile, "%s\n", asm_txt); + asm_txt = strtok(NULL, "\n"); + } + fprintf(outfile, "\nFound %d instructions\n", instructions); + } + fclose(outfile); + + if (instructions < 0) { + // RAW dump in case of error + fprintf(stderr, "Error: failed to find the number of instructions!\n"); + outfile = fopen(file.c_str(), "wb"); + fwrite(binary, binaryLength, 1, outfile); + fclose(outfile); + ASSERT(0); + } + + delete[] binary; + + return instructions; +} + void GSShaderOGL::Delete(GLuint s) { if (GLLoader::found_GL_ARB_separate_shader_objects) { diff --git a/plugins/GSdx/GSShaderOGL.h b/plugins/GSdx/GSShaderOGL.h index 364858ad3b..92e781dabc 100644 --- a/plugins/GSdx/GSShaderOGL.h +++ b/plugins/GSdx/GSShaderOGL.h @@ -55,5 +55,8 @@ class GSShaderOGL { void UseProgram(); GLuint Compile(const std::string& glsl_file, const std::string& entry, GLenum type, const char* glsl_h_code, const std::string& macro_sel = ""); + + int DumpAsm(const std::string& file, GLuint p); + void Delete(GLuint s); }; diff --git a/plugins/GSdx/GSWnd.cpp b/plugins/GSdx/GSWnd.cpp index d92a2ec519..abcf4ed42f 100644 --- a/plugins/GSdx/GSWnd.cpp +++ b/plugins/GSdx/GSWnd.cpp @@ -93,6 +93,7 @@ void GSWndGL::PopulateGlFunction() *(void**)&(gl_ValidateProgramPipeline) = GetProcAddress("glValidateProgramPipeline", true); *(void**)&(gl_UseProgramStages) = GetProcAddress("glUseProgramStages", true); *(void**)&(gl_ProgramUniform1i) = GetProcAddress("glProgramUniform1i", true); // but no GL4.2 + *(void**)&(gl_GetProgramBinary) = GetProcAddress("glGetProgramBinary", true); // NO GL4.1 *(void**)&(gl_DeleteProgram) = GetProcAddress("glDeleteProgram"); *(void**)&(gl_DeleteShader) = GetProcAddress("glDeleteShader");