diff --git a/plugins/GSdx/GLLoader.cpp b/plugins/GSdx/GLLoader.cpp index 8ce51f1e01..c8bffcfb6f 100644 --- a/plugins/GSdx/GLLoader.cpp +++ b/plugins/GSdx/GLLoader.cpp @@ -95,6 +95,7 @@ PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines = NU PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv = NULL; PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline = NULL; PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog = NULL; +PFNGLGETPROGRAMBINARYPROC gl_GetProgramBinary = NULL; // NO GL4.1 PFNGLUSEPROGRAMPROC gl_UseProgram = NULL; PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog = NULL; diff --git a/plugins/GSdx/GLLoader.h b/plugins/GSdx/GLLoader.h index c87a2b3560..ea3d5a43df 100644 --- a/plugins/GSdx/GLLoader.h +++ b/plugins/GSdx/GLLoader.h @@ -278,6 +278,7 @@ extern PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines; extern PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv; extern PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline; extern PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog; +extern PFNGLGETPROGRAMBINARYPROC gl_GetProgramBinary; // NO GL4.1 extern PFNGLUSEPROGRAMPROC gl_UseProgram; extern PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog; diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 7dce676eb9..09bb1765ad 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -1532,7 +1532,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) } if (s_gs->m_wnd == NULL) return; - { + if (theApp.GetConfig("debug_glsl_shader", 0) == 2) { + dynamic_cast(s_gs->m_dev)->SelfShaderTest(); + return; + } + + { // Read .gs content std::string f(lpszCmdLine); #ifdef LZMA_SUPPORTED GSDumpFile* file = (f.size() >= 4) && (f.compare(f.size()-3, 3, ".xz") == 0) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 255998c25e..0a5bd0cc0b 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -669,6 +669,187 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro); } +void GSDeviceOGL::SelfShaderTest() +{ +#define RUN_TEST \ + do { \ + GLuint p = CompilePS(sel); \ + nb_shader++; \ + perf += m_shader->DumpAsm(file, p); \ + m_shader->Delete(p); \ + } while(0); + +#define PRINT_TEST(s) \ + do { \ + fprintf(stderr, "%s %d instructions for %d shaders (mean of %4.2f)\n", \ + s, perf, nb_shader, (float)perf/(float)nb_shader); \ + all += perf; \ + perf = 0; \ + nb_shader = 0; \ + } while(0); + + int nb_shader = 0; + int perf = 0; + int all = 0; + // Test: SW blending + for (int colclip = 0; colclip < 4; colclip += 3) { + for (int fmt = 0; fmt < 3; fmt++) { + for (int i = 0; i < 3; i++) { + PSSelector sel; + sel.atst = 1; + sel.tfx = 4; + + int ib = (i + 1) % 3; +#if 1 + sel.blend = i*5; +#else + sel.blend_a = i; + sel.blend_b = ib;; + sel.blend_c = i; + sel.blend_d = i; +#endif + sel.colclip = colclip; + sel.dfmt = fmt; + + std::string file = format("Shader_Blend_%d_%d_%d_%d__Cclip_%d__Dfmt_%d.glsl.asm", + i, ib, i, i, colclip, fmt); + RUN_TEST; + } + } + } + PRINT_TEST("Blend"); + + // Test: alpha test + for (int atst = 0; atst < 8; atst++) { + PSSelector sel; + sel.tfx = 4; + + sel.atst = atst; + std::string file = format("Shader_Atst_%d.glsl.asm", atst); + RUN_TEST; + } + PRINT_TEST("Alpha Tst"); + + // Test: fbmask/fog/shuffle/read_ba + for (int read_ba = 0; read_ba < 2; read_ba++) { + PSSelector sel; + sel.tfx = 4; + sel.atst = 1; + + sel.fog = 1; + sel.fbmask = 1; + sel.shuffle = 1; + sel.read_ba = read_ba; + + std::string file = format("Shader_Fog__Fbmask__Shuffle__Read_ba_%d.glsl.asm", read_ba); + RUN_TEST; + } + PRINT_TEST("Fbmask/fog/shuffle/read_ba"); + + // Test: Date + for (int date = 1; date < 7; date++) { + PSSelector sel; + sel.tfx = 4; + sel.atst = 1; + + sel.date = date; + std::string file = format("Shader_Date_%d.glsl.asm", date); + RUN_TEST; + } + PRINT_TEST("Date"); + + // Test: FBA + for (int fmt = 0; fmt < 3; fmt++) { + PSSelector sel; + sel.tfx = 4; + sel.atst = 1; + + sel.fba = 1; + sel.dfmt = fmt; + sel.clr1 = 1; + std::string file = format("Shader_Fba__Clr1__Dfmt_%d.glsl.asm", fmt); + RUN_TEST; + } + PRINT_TEST("Fba/Clr1/Dfmt"); + + // Test: Fst/Tc/IIP + { + PSSelector sel; + sel.tfx = 1; + sel.atst = 1; + + sel.fst = 0; + sel.iip = 1; + sel.tcoffsethack = 1; + + std::string file = format("Shader_Fst__TC__Iip.glsl.asm"); + RUN_TEST; + } + PRINT_TEST("Fst/Tc/IIp"); + + // Test: Colclip + for (int colclip = 0; colclip < 3; colclip += 1) { + PSSelector sel; + sel.tfx = 4; + sel.atst = 1; + + sel.colclip = colclip; + std::string file = format("Shader_Colclip_%d.glsl.asm", colclip); + RUN_TEST; + } + PRINT_TEST("Colclip"); + + // Test: tfx/tcc + for (int tfx = 0; tfx < 5; tfx++) { + for (int tcc = 0; tcc < 2; tcc++) { + PSSelector sel; + sel.atst = 1; + sel.fst = 1; + + sel.tfx = tfx; + sel.tcc = tcc; + std::string file = format("Shader_Tfx_%d__Tcc_%d.glsl.asm", tfx, tcc); + RUN_TEST; + } + } + PRINT_TEST("Tfx/Tcc"); + + // Test: Texture Sampling + for (int fmt = 0; fmt < 8; fmt++) { + if ((fmt & 3) == 3) continue; + + for (int ltf = 0; ltf < 2; ltf++) { + for (int aem = 0; aem < 2; aem++) { + for (int ifmt = 0; ifmt < 3; ifmt++) { + for (int wms = 1; wms < 4; wms++) { + for (int wmt = 1; wmt < 4; wmt++) { + PSSelector sel; + sel.atst = 1; + sel.tfx = 1; + + sel.ltf = ltf; + sel.aem = aem; + sel.fmt = fmt; + sel.ifmt = ifmt; + sel.wms = wms; + sel.wmt = wmt; + std::string file = format("Shader_Ltf_%d__Aem_%d__Fmt_%d__Ifmt_%d__Wms_%d__Wmt_%d.glsl.asm", + ltf, aem, fmt, ifmt, wms, wmt); + RUN_TEST; + } + } + } + } + } + } + PRINT_TEST("Texture Sampling"); + + fprintf(stderr, "\nTotal %d\n", all); + +#undef RUN_TEST +#undef PRINT_TEST +} + GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format) { return GSDevice::CreateRenderTarget(w, h, msaa, format ? format : GL_RGBA8); diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 62e72e2662..8b249e4da3 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -658,6 +658,8 @@ class GSDeviceOGL : public GSDevice GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel); GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, float afix); + void SelfShaderTest(); + void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); void SetupVS(VSSelector sel); diff --git a/plugins/GSdx/GSShaderOGL.cpp b/plugins/GSdx/GSShaderOGL.cpp index 61f0f437cf..19759c05f8 100644 --- a/plugins/GSdx/GSShaderOGL.cpp +++ b/plugins/GSdx/GSShaderOGL.cpp @@ -393,6 +393,65 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent return program; } +// This function will get the binary program. Normally it must be used a caching +// solution but Nvidia also incorporates the ASM dump. Asm is nice because it allow +// to have an overview of the program performance based on the instruction number +// Note: initially I was using cg offline compiler but it doesn't support latest +// GLSL improvement (unfortunately). +int GSShaderOGL::DumpAsm(const std::string& file, GLuint p) +{ + if (!GLLoader::nvidia_buggy_driver) return 0; + + GLint binaryLength; + gl_GetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength); + + char* binary = new char[binaryLength+4]; + GLenum binaryFormat; + gl_GetProgramBinary(p, binaryLength, NULL, &binaryFormat, binary); + + FILE* outfile = fopen(file.c_str(), "w"); + ASSERT(outfile); + + // Search the magic number "!!" + int asm_ = 0; + while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_+1] != '!')) { + asm_ += 1; + } + + int instructions = -1; + if (asm_ < binaryLength) { + // Now print asm as text + char* asm_txt = strtok(&binary[asm_], "\n"); + while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5))) { + if (strncmp(asm_txt, "OUT", 3) == 0) { + instructions = 0; + } else if (instructions >= 0) { + if (instructions == 0) + fprintf(outfile, "\n"); + instructions++; + } + + fprintf(outfile, "%s\n", asm_txt); + asm_txt = strtok(NULL, "\n"); + } + fprintf(outfile, "\nFound %d instructions\n", instructions); + } + fclose(outfile); + + if (instructions < 0) { + // RAW dump in case of error + fprintf(stderr, "Error: failed to find the number of instructions!\n"); + outfile = fopen(file.c_str(), "wb"); + fwrite(binary, binaryLength, 1, outfile); + fclose(outfile); + ASSERT(0); + } + + delete[] binary; + + return instructions; +} + void GSShaderOGL::Delete(GLuint s) { if (GLLoader::found_GL_ARB_separate_shader_objects) { diff --git a/plugins/GSdx/GSShaderOGL.h b/plugins/GSdx/GSShaderOGL.h index 364858ad3b..92e781dabc 100644 --- a/plugins/GSdx/GSShaderOGL.h +++ b/plugins/GSdx/GSShaderOGL.h @@ -55,5 +55,8 @@ class GSShaderOGL { void UseProgram(); GLuint Compile(const std::string& glsl_file, const std::string& entry, GLenum type, const char* glsl_h_code, const std::string& macro_sel = ""); + + int DumpAsm(const std::string& file, GLuint p); + void Delete(GLuint s); }; diff --git a/plugins/GSdx/GSWnd.cpp b/plugins/GSdx/GSWnd.cpp index d92a2ec519..abcf4ed42f 100644 --- a/plugins/GSdx/GSWnd.cpp +++ b/plugins/GSdx/GSWnd.cpp @@ -93,6 +93,7 @@ void GSWndGL::PopulateGlFunction() *(void**)&(gl_ValidateProgramPipeline) = GetProcAddress("glValidateProgramPipeline", true); *(void**)&(gl_UseProgramStages) = GetProcAddress("glUseProgramStages", true); *(void**)&(gl_ProgramUniform1i) = GetProcAddress("glProgramUniform1i", true); // but no GL4.2 + *(void**)&(gl_GetProgramBinary) = GetProcAddress("glGetProgramBinary", true); // NO GL4.1 *(void**)&(gl_DeleteProgram) = GetProcAddress("glDeleteProgram"); *(void**)&(gl_DeleteShader) = GetProcAddress("glDeleteShader");