gsdx-ogl-debug: allow to dump various ps shader

Nvidia allows to get the ASM of the shader of the compiled shader. It is useful
to check the performance.

It also allow me to compile most of shader code path for QA

Dump is enabled in linux replayer + debug_glsl_shader = 2
This commit is contained in:
Gregory Hainaut 2015-07-15 08:59:16 +02:00
parent 344030cbe4
commit e245b27c97
8 changed files with 254 additions and 1 deletions

View File

@ -95,6 +95,7 @@ PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines = NU
PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv = NULL;
PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline = NULL;
PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog = NULL;
PFNGLGETPROGRAMBINARYPROC gl_GetProgramBinary = NULL;
// NO GL4.1
PFNGLUSEPROGRAMPROC gl_UseProgram = NULL;
PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog = NULL;

View File

@ -278,6 +278,7 @@ extern PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines;
extern PFNGLGETPROGRAMPIPELINEIVPROC gl_GetProgramPipelineiv;
extern PFNGLVALIDATEPROGRAMPIPELINEPROC gl_ValidateProgramPipeline;
extern PFNGLGETPROGRAMPIPELINEINFOLOGPROC gl_GetProgramPipelineInfoLog;
extern PFNGLGETPROGRAMBINARYPROC gl_GetProgramBinary;
// NO GL4.1
extern PFNGLUSEPROGRAMPROC gl_UseProgram;
extern PFNGLGETSHADERINFOLOGPROC gl_GetShaderInfoLog;

View File

@ -1532,7 +1532,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
}
if (s_gs->m_wnd == NULL) return;
{
if (theApp.GetConfig("debug_glsl_shader", 0) == 2) {
dynamic_cast<GSDeviceOGL*>(s_gs->m_dev)->SelfShaderTest();
return;
}
{ // Read .gs content
std::string f(lpszCmdLine);
#ifdef LZMA_SUPPORTED
GSDumpFile* file = (f.size() >= 4) && (f.compare(f.size()-3, 3, ".xz") == 0)

View File

@ -669,6 +669,187 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
}
void GSDeviceOGL::SelfShaderTest()
{
#define RUN_TEST \
do { \
GLuint p = CompilePS(sel); \
nb_shader++; \
perf += m_shader->DumpAsm(file, p); \
m_shader->Delete(p); \
} while(0);
#define PRINT_TEST(s) \
do { \
fprintf(stderr, "%s %d instructions for %d shaders (mean of %4.2f)\n", \
s, perf, nb_shader, (float)perf/(float)nb_shader); \
all += perf; \
perf = 0; \
nb_shader = 0; \
} while(0);
int nb_shader = 0;
int perf = 0;
int all = 0;
// Test: SW blending
for (int colclip = 0; colclip < 4; colclip += 3) {
for (int fmt = 0; fmt < 3; fmt++) {
for (int i = 0; i < 3; i++) {
PSSelector sel;
sel.atst = 1;
sel.tfx = 4;
int ib = (i + 1) % 3;
#if 1
sel.blend = i*5;
#else
sel.blend_a = i;
sel.blend_b = ib;;
sel.blend_c = i;
sel.blend_d = i;
#endif
sel.colclip = colclip;
sel.dfmt = fmt;
std::string file = format("Shader_Blend_%d_%d_%d_%d__Cclip_%d__Dfmt_%d.glsl.asm",
i, ib, i, i, colclip, fmt);
RUN_TEST;
}
}
}
PRINT_TEST("Blend");
// Test: alpha test
for (int atst = 0; atst < 8; atst++) {
PSSelector sel;
sel.tfx = 4;
sel.atst = atst;
std::string file = format("Shader_Atst_%d.glsl.asm", atst);
RUN_TEST;
}
PRINT_TEST("Alpha Tst");
// Test: fbmask/fog/shuffle/read_ba
for (int read_ba = 0; read_ba < 2; read_ba++) {
PSSelector sel;
sel.tfx = 4;
sel.atst = 1;
sel.fog = 1;
sel.fbmask = 1;
sel.shuffle = 1;
sel.read_ba = read_ba;
std::string file = format("Shader_Fog__Fbmask__Shuffle__Read_ba_%d.glsl.asm", read_ba);
RUN_TEST;
}
PRINT_TEST("Fbmask/fog/shuffle/read_ba");
// Test: Date
for (int date = 1; date < 7; date++) {
PSSelector sel;
sel.tfx = 4;
sel.atst = 1;
sel.date = date;
std::string file = format("Shader_Date_%d.glsl.asm", date);
RUN_TEST;
}
PRINT_TEST("Date");
// Test: FBA
for (int fmt = 0; fmt < 3; fmt++) {
PSSelector sel;
sel.tfx = 4;
sel.atst = 1;
sel.fba = 1;
sel.dfmt = fmt;
sel.clr1 = 1;
std::string file = format("Shader_Fba__Clr1__Dfmt_%d.glsl.asm", fmt);
RUN_TEST;
}
PRINT_TEST("Fba/Clr1/Dfmt");
// Test: Fst/Tc/IIP
{
PSSelector sel;
sel.tfx = 1;
sel.atst = 1;
sel.fst = 0;
sel.iip = 1;
sel.tcoffsethack = 1;
std::string file = format("Shader_Fst__TC__Iip.glsl.asm");
RUN_TEST;
}
PRINT_TEST("Fst/Tc/IIp");
// Test: Colclip
for (int colclip = 0; colclip < 3; colclip += 1) {
PSSelector sel;
sel.tfx = 4;
sel.atst = 1;
sel.colclip = colclip;
std::string file = format("Shader_Colclip_%d.glsl.asm", colclip);
RUN_TEST;
}
PRINT_TEST("Colclip");
// Test: tfx/tcc
for (int tfx = 0; tfx < 5; tfx++) {
for (int tcc = 0; tcc < 2; tcc++) {
PSSelector sel;
sel.atst = 1;
sel.fst = 1;
sel.tfx = tfx;
sel.tcc = tcc;
std::string file = format("Shader_Tfx_%d__Tcc_%d.glsl.asm", tfx, tcc);
RUN_TEST;
}
}
PRINT_TEST("Tfx/Tcc");
// Test: Texture Sampling
for (int fmt = 0; fmt < 8; fmt++) {
if ((fmt & 3) == 3) continue;
for (int ltf = 0; ltf < 2; ltf++) {
for (int aem = 0; aem < 2; aem++) {
for (int ifmt = 0; ifmt < 3; ifmt++) {
for (int wms = 1; wms < 4; wms++) {
for (int wmt = 1; wmt < 4; wmt++) {
PSSelector sel;
sel.atst = 1;
sel.tfx = 1;
sel.ltf = ltf;
sel.aem = aem;
sel.fmt = fmt;
sel.ifmt = ifmt;
sel.wms = wms;
sel.wmt = wmt;
std::string file = format("Shader_Ltf_%d__Aem_%d__Fmt_%d__Ifmt_%d__Wms_%d__Wmt_%d.glsl.asm",
ltf, aem, fmt, ifmt, wms, wmt);
RUN_TEST;
}
}
}
}
}
}
PRINT_TEST("Texture Sampling");
fprintf(stderr, "\nTotal %d\n", all);
#undef RUN_TEST
#undef PRINT_TEST
}
GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format)
{
return GSDevice::CreateRenderTarget(w, h, msaa, format ? format : GL_RGBA8);

View File

@ -658,6 +658,8 @@ class GSDeviceOGL : public GSDevice
GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);
GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, float afix);
void SelfShaderTest();
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel);

View File

@ -393,6 +393,65 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent
return program;
}
// This function will get the binary program. Normally it must be used a caching
// solution but Nvidia also incorporates the ASM dump. Asm is nice because it allow
// to have an overview of the program performance based on the instruction number
// Note: initially I was using cg offline compiler but it doesn't support latest
// GLSL improvement (unfortunately).
int GSShaderOGL::DumpAsm(const std::string& file, GLuint p)
{
if (!GLLoader::nvidia_buggy_driver) return 0;
GLint binaryLength;
gl_GetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
char* binary = new char[binaryLength+4];
GLenum binaryFormat;
gl_GetProgramBinary(p, binaryLength, NULL, &binaryFormat, binary);
FILE* outfile = fopen(file.c_str(), "w");
ASSERT(outfile);
// Search the magic number "!!"
int asm_ = 0;
while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_+1] != '!')) {
asm_ += 1;
}
int instructions = -1;
if (asm_ < binaryLength) {
// Now print asm as text
char* asm_txt = strtok(&binary[asm_], "\n");
while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5))) {
if (strncmp(asm_txt, "OUT", 3) == 0) {
instructions = 0;
} else if (instructions >= 0) {
if (instructions == 0)
fprintf(outfile, "\n");
instructions++;
}
fprintf(outfile, "%s\n", asm_txt);
asm_txt = strtok(NULL, "\n");
}
fprintf(outfile, "\nFound %d instructions\n", instructions);
}
fclose(outfile);
if (instructions < 0) {
// RAW dump in case of error
fprintf(stderr, "Error: failed to find the number of instructions!\n");
outfile = fopen(file.c_str(), "wb");
fwrite(binary, binaryLength, 1, outfile);
fclose(outfile);
ASSERT(0);
}
delete[] binary;
return instructions;
}
void GSShaderOGL::Delete(GLuint s)
{
if (GLLoader::found_GL_ARB_separate_shader_objects) {

View File

@ -55,5 +55,8 @@ class GSShaderOGL {
void UseProgram();
GLuint Compile(const std::string& glsl_file, const std::string& entry, GLenum type, const char* glsl_h_code, const std::string& macro_sel = "");
int DumpAsm(const std::string& file, GLuint p);
void Delete(GLuint s);
};

View File

@ -93,6 +93,7 @@ void GSWndGL::PopulateGlFunction()
*(void**)&(gl_ValidateProgramPipeline) = GetProcAddress("glValidateProgramPipeline", true);
*(void**)&(gl_UseProgramStages) = GetProcAddress("glUseProgramStages", true);
*(void**)&(gl_ProgramUniform1i) = GetProcAddress("glProgramUniform1i", true); // but no GL4.2
*(void**)&(gl_GetProgramBinary) = GetProcAddress("glGetProgramBinary", true);
// NO GL4.1
*(void**)&(gl_DeleteProgram) = GetProcAddress("glDeleteProgram");
*(void**)&(gl_DeleteShader) = GetProcAddress("glDeleteShader");