mirror of https://github.com/PCSX2/pcsx2.git
gsdx ogl: the proof of concept commit
* GL_ARB_shader_subroutine for perf fix for nvidia => add missing shader declaration. Nvidia got +4fps on colin3 :) For the moment only 2 PS parameters are supported. Code need to be extended to support others games that often switch shader program (like xenosaga). require GL4 class hardware and the option override_GL_ARB_shader_subroutine = 1 Note: strangely on AMD linux it is slower! * GL_ARB_shader_image_load_store for accuraccy (Date) Use a signed integer texture and reenable color buffer writing Current status: Amagami_transparency.gs & P3_battle_shadows.gs are now working on Nvidia with a small perf impact. Current implementation detail: 1/ setup the standard stencil as before 2/ on remaining pixel, draw once to compute first primitive that will write a fail alpha value. 3/ final draw based on primitive id of step 2 Note: I think we would get a bad behavior if depth test&mask are enabled on step 2/3 Note2: on my limited testcase the perf impact was on CPU. It would be possible to merge step1&2 to nullifying it (could even be faster actually), however it would require more GPU power. Again require GL4 class hardware. And the option UserHacks_DateGL4 = 1 git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5725 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
452cf72ddf
commit
e01c6cd9ce
|
@ -132,6 +132,7 @@ namespace GLLoader {
|
||||||
bool found_only_gl30 = false; // Drop it when mesa support GLSL330
|
bool found_only_gl30 = false; // Drop it when mesa support GLSL330
|
||||||
bool found_GL_ARB_clear_texture = false; // Don't know if GL3 GPU can support it
|
bool found_GL_ARB_clear_texture = false; // Don't know if GL3 GPU can support it
|
||||||
bool found_GL_ARB_buffer_storage = false;
|
bool found_GL_ARB_buffer_storage = false;
|
||||||
|
bool found_GL_ARB_explicit_uniform_location = false; // need by subroutine
|
||||||
// GL4 hardware
|
// GL4 hardware
|
||||||
bool found_GL_ARB_copy_image = false; // Not sure actually maybe GL3 GPU can do it
|
bool found_GL_ARB_copy_image = false; // Not sure actually maybe GL3 GPU can do it
|
||||||
bool found_GL_ARB_gpu_shader5 = false;
|
bool found_GL_ARB_gpu_shader5 = false;
|
||||||
|
@ -233,6 +234,8 @@ namespace GLLoader {
|
||||||
if (ext.compare("GL_ARB_gpu_shader5") == 0) found_GL_ARB_gpu_shader5 = true;
|
if (ext.compare("GL_ARB_gpu_shader5") == 0) found_GL_ARB_gpu_shader5 = true;
|
||||||
if (ext.compare("GL_ARB_shader_image_load_store") == 0) found_GL_ARB_shader_image_load_store = true;
|
if (ext.compare("GL_ARB_shader_image_load_store") == 0) found_GL_ARB_shader_image_load_store = true;
|
||||||
#if 0
|
#if 0
|
||||||
|
// Erratum: on nvidia implementation, gain is very nice : 42.5 fps => 46.5 fps
|
||||||
|
//
|
||||||
// Strangely it doesn't provide the speed boost as expected.
|
// Strangely it doesn't provide the speed boost as expected.
|
||||||
// Note: only atst/colclip was replaced with subroutine for the moment. It replace 2000 program switch on
|
// Note: only atst/colclip was replaced with subroutine for the moment. It replace 2000 program switch on
|
||||||
// colin mcrae 3 by 2100 uniform, but code is slower!
|
// colin mcrae 3 by 2100 uniform, but code is slower!
|
||||||
|
@ -243,6 +246,7 @@ namespace GLLoader {
|
||||||
|
|
||||||
if (ext.compare("GL_ARB_shader_subroutine") == 0) found_GL_ARB_shader_subroutine = true;
|
if (ext.compare("GL_ARB_shader_subroutine") == 0) found_GL_ARB_shader_subroutine = true;
|
||||||
#endif
|
#endif
|
||||||
|
if (ext.compare("GL_ARB_explicit_uniform_location") == 0) found_GL_ARB_explicit_uniform_location = true;
|
||||||
#ifdef GL44 // Need to debug the code first
|
#ifdef GL44 // Need to debug the code first
|
||||||
if (ext.compare("GL_ARB_clear_texture") == 0) found_GL_ARB_clear_texture = true;
|
if (ext.compare("GL_ARB_clear_texture") == 0) found_GL_ARB_clear_texture = true;
|
||||||
if (ext.compare("GL_ARB_multi_bind") == 0) found_GL_ARB_multi_bind = true;
|
if (ext.compare("GL_ARB_multi_bind") == 0) found_GL_ARB_multi_bind = true;
|
||||||
|
@ -268,6 +272,7 @@ namespace GLLoader {
|
||||||
status &= status_and_override(found_GL_ARB_clear_texture,"GL_ARB_clear_texture");
|
status &= status_and_override(found_GL_ARB_clear_texture,"GL_ARB_clear_texture");
|
||||||
status &= status_and_override(found_GL_ARB_buffer_storage,"GL_ARB_buffer_storage");
|
status &= status_and_override(found_GL_ARB_buffer_storage,"GL_ARB_buffer_storage");
|
||||||
status &= status_and_override(found_GL_ARB_shader_subroutine,"GL_ARB_shader_subroutine");
|
status &= status_and_override(found_GL_ARB_shader_subroutine,"GL_ARB_shader_subroutine");
|
||||||
|
status &= status_and_override(found_GL_ARB_explicit_uniform_location,"GL_ARB_explicit_uniform_location");
|
||||||
|
|
||||||
status &= status_and_override(found_GL_ARB_texture_storage, "GL_ARB_texture_storage", true);
|
status &= status_and_override(found_GL_ARB_texture_storage, "GL_ARB_texture_storage", true);
|
||||||
status &= status_and_override(found_GL_ARB_shading_language_420pack,"GL_ARB_shading_language_420pack");
|
status &= status_and_override(found_GL_ARB_shading_language_420pack,"GL_ARB_shading_language_420pack");
|
||||||
|
|
|
@ -279,4 +279,5 @@ namespace GLLoader {
|
||||||
extern bool found_GL_ARB_buffer_storage;
|
extern bool found_GL_ARB_buffer_storage;
|
||||||
extern bool found_GL_ARB_shader_subroutine;
|
extern bool found_GL_ARB_shader_subroutine;
|
||||||
extern bool found_GL_ARB_bindless_texture;
|
extern bool found_GL_ARB_bindless_texture;
|
||||||
|
extern bool found_GL_ARB_explicit_uniform_location;
|
||||||
}
|
}
|
||||||
|
|
|
@ -826,7 +826,8 @@ EXPORT_C GSgetTitleInfo2(char* dest, size_t length)
|
||||||
{
|
{
|
||||||
string s = "GSdx";
|
string s = "GSdx";
|
||||||
|
|
||||||
if(s_gs != NULL) // TODO: this gets called from a different thread concurrently with GSOpen (on linux)
|
// TODO: this gets called from a different thread concurrently with GSOpen (on linux)
|
||||||
|
if(s_gs == NULL) return;
|
||||||
|
|
||||||
if(s_gs->m_GStitleInfoBuffer[0])
|
if(s_gs->m_GStitleInfoBuffer[0])
|
||||||
{
|
{
|
||||||
|
|
|
@ -108,6 +108,7 @@ GSDeviceOGL::~GSDeviceOGL()
|
||||||
delete m_ps_cb;
|
delete m_ps_cb;
|
||||||
gl_DeleteSamplers(1, &m_palette_ss);
|
gl_DeleteSamplers(1, &m_palette_ss);
|
||||||
delete m_vb;
|
delete m_vb;
|
||||||
|
m_shader->Delete(m_apitrace);
|
||||||
|
|
||||||
for (uint32 key = 0; key < VSSelector::size(); key++) m_shader->Delete(m_vs[key]);
|
for (uint32 key = 0; key < VSSelector::size(); key++) m_shader->Delete(m_vs[key]);
|
||||||
m_shader->Delete(m_gs);
|
m_shader->Delete(m_gs);
|
||||||
|
@ -296,10 +297,11 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
|
||||||
m_date.dss->SetStencil(GL_ALWAYS, GL_REPLACE);
|
m_date.dss->SetStencil(GL_ALWAYS, GL_REPLACE);
|
||||||
|
|
||||||
m_date.bs = new GSBlendStateOGL();
|
m_date.bs = new GSBlendStateOGL();
|
||||||
#ifndef ENABLE_OGL_STENCIL_DEBUG
|
// FIXME impact image load?
|
||||||
// Only keep stencil data
|
//#ifndef ENABLE_OGL_STENCIL_DEBUG
|
||||||
m_date.bs->SetMask(false, false, false, false);
|
// // Only keep stencil data
|
||||||
#endif
|
// m_date.bs->SetMask(false, false, false, false);
|
||||||
|
//#endif
|
||||||
|
|
||||||
// ****************************************************************
|
// ****************************************************************
|
||||||
// HW renderer shader
|
// HW renderer shader
|
||||||
|
@ -538,9 +540,9 @@ void GSDeviceOGL::InitPrimDateTexture(int w, int h)
|
||||||
{
|
{
|
||||||
// Create a texture to avoid the useless clean@0
|
// Create a texture to avoid the useless clean@0
|
||||||
if (m_date.t == NULL)
|
if (m_date.t == NULL)
|
||||||
m_date.t = CreateTexture(w, h, GL_R32UI);
|
m_date.t = CreateTexture(w, h, GL_R32I);
|
||||||
|
|
||||||
ClearRenderTarget_ui(m_date.t, 0xFFFFFFFF);
|
ClearRenderTarget_ui(m_date.t, 0x0FFFFFFF);
|
||||||
|
|
||||||
#ifdef ENABLE_OGL_STENCIL_DEBUG
|
#ifdef ENABLE_OGL_STENCIL_DEBUG
|
||||||
gl_ActiveTexture(GL_TEXTURE0 + 5);
|
gl_ActiveTexture(GL_TEXTURE0 + 5);
|
||||||
|
@ -557,9 +559,9 @@ void GSDeviceOGL::BindDateTexture()
|
||||||
// TODO: multibind?
|
// TODO: multibind?
|
||||||
// GLuint textures[1] = {static_cast<GSTextureOGL*>(m_date.t)->GetID()};
|
// GLuint textures[1] = {static_cast<GSTextureOGL*>(m_date.t)->GetID()};
|
||||||
// gl_BindImageTextures(0, 1, textures);
|
// gl_BindImageTextures(0, 1, textures);
|
||||||
//gl_BindImageTexture(0, 0, 0, true, 0, GL_READ_WRITE, GL_R32UI);
|
//gl_BindImageTexture(0, 0, 0, true, 0, GL_READ_WRITE, GL_R32I);
|
||||||
|
|
||||||
gl_BindImageTexture(0, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32UI);
|
gl_BindImageTexture(0, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::RecycleDateTexture()
|
void GSDeviceOGL::RecycleDateTexture()
|
||||||
|
|
|
@ -509,6 +509,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
GSDepthStencilOGL* m_om_dss[1<<6];
|
GSDepthStencilOGL* m_om_dss[1<<6];
|
||||||
hash_map<uint32, GLuint > m_ps;
|
hash_map<uint32, GLuint > m_ps;
|
||||||
hash_map<uint32, GSBlendStateOGL* > m_om_bs;
|
hash_map<uint32, GSBlendStateOGL* > m_om_bs;
|
||||||
|
GLuint m_apitrace;
|
||||||
|
|
||||||
GLuint m_palette_ss;
|
GLuint m_palette_ss;
|
||||||
GLuint m_rt_ss;
|
GLuint m_rt_ss;
|
||||||
|
|
|
@ -268,10 +268,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
||||||
|
|
||||||
// TODO
|
// TODO
|
||||||
//if (UserHacks_DateGL4 && DATE && om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) {
|
if (UserHacks_DateGL4 && DATE && om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) {
|
||||||
if (UserHacks_DateGL4 && DATE) {
|
|
||||||
//if (!(context->FBA.FBA && context->TEST.DATM == 1))
|
//if (!(context->FBA.FBA && context->TEST.DATM == 1))
|
||||||
advance_DATE = true;
|
|
||||||
|
//advance_DATE = true;
|
||||||
|
advance_DATE = GLLoader::found_GL_ARB_shader_image_load_store;
|
||||||
}
|
}
|
||||||
|
|
||||||
// vs
|
// vs
|
||||||
|
@ -500,8 +501,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
dev->SetupCB(&vs_cb, &ps_cb);
|
dev->SetupCB(&vs_cb, &ps_cb);
|
||||||
|
|
||||||
if (advance_DATE) {
|
if (advance_DATE) {
|
||||||
// Create an r32ui image that will contain primitive ID
|
// Create an r32i image that will contain primitive ID
|
||||||
// Note: do it at the beginning because the clean will dirty the state
|
// Note: do it at the beginning because the clean will dirty the FBO state
|
||||||
//dev->InitPrimDateTexture(rtsize.x, rtsize.y);
|
//dev->InitPrimDateTexture(rtsize.x, rtsize.y);
|
||||||
|
|
||||||
// Don't write anything on the color buffer
|
// Don't write anything on the color buffer
|
||||||
|
|
|
@ -89,6 +89,7 @@ void GSShaderOGL::PS(GLuint s, GLuint sub_count)
|
||||||
|
|
||||||
GLState::ps = s;
|
GLState::ps = s;
|
||||||
GLState::dirty_prog = true;
|
GLState::dirty_prog = true;
|
||||||
|
GLState::dirty_subroutine_ps = true;
|
||||||
#ifndef ENABLE_GLES
|
#ifndef ENABLE_GLES
|
||||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||||
gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s);
|
gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s);
|
||||||
|
@ -278,10 +279,10 @@ GLuint GSShaderOGL::LinkNewProgram()
|
||||||
void GSShaderOGL::UseProgram()
|
void GSShaderOGL::UseProgram()
|
||||||
{
|
{
|
||||||
if (GLState::dirty_prog) {
|
if (GLState::dirty_prog) {
|
||||||
|
if (!GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||||
GLState::dirty_subroutine_ps = true;
|
GLState::dirty_subroutine_ps = true;
|
||||||
GLState::dirty_ressources = true;
|
GLState::dirty_ressources = true;
|
||||||
|
|
||||||
if (!GLLoader::found_GL_ARB_separate_shader_objects) {
|
|
||||||
hash_map<uint64, GLuint >::iterator it;
|
hash_map<uint64, GLuint >::iterator it;
|
||||||
// Note: shader are integer lookup pointer. They start from 1 and incr
|
// Note: shader are integer lookup pointer. They start from 1 and incr
|
||||||
// every time you create a new shader OR a new program.
|
// every time you create a new shader OR a new program.
|
||||||
|
@ -340,19 +341,22 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
|
||||||
}
|
}
|
||||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||||
// Need GL version 410
|
// Need GL version 410
|
||||||
header += "#extension GL_ARB_separate_shader_objects : require\n";
|
header += "#extension GL_ARB_separate_shader_objects: require\n";
|
||||||
} else {
|
} else {
|
||||||
header += "#define DISABLE_SSO\n";
|
header += "#define DISABLE_SSO\n";
|
||||||
}
|
}
|
||||||
if (GLLoader::found_only_gl30) {
|
if (GLLoader::found_only_gl30) {
|
||||||
// Need version 330
|
// Need version 330
|
||||||
header += "#extension GL_ARB_explicit_attrib_location : require\n";
|
header += "#extension GL_ARB_explicit_attrib_location: require\n";
|
||||||
// Need version 140
|
// Need version 140
|
||||||
header += "#extension GL_ARB_uniform_buffer_object : require\n";
|
header += "#extension GL_ARB_uniform_buffer_object: require\n";
|
||||||
}
|
}
|
||||||
if (GLLoader::found_GL_ARB_shader_subroutine) {
|
if (GLLoader::found_GL_ARB_shader_subroutine && GLLoader::found_GL_ARB_explicit_uniform_location) {
|
||||||
// Need GL version 400
|
// Need GL version 400
|
||||||
header += "#define SUBROUTINE_GL40 1\n";
|
header += "#define SUBROUTINE_GL40 1\n";
|
||||||
|
header += "#extension GL_ARB_shader_subroutine: require\n";
|
||||||
|
// Need GL version 430
|
||||||
|
header += "#extension GL_ARB_explicit_uniform_location: require\n";
|
||||||
}
|
}
|
||||||
#ifdef ENABLE_OGL_STENCIL_DEBUG
|
#ifdef ENABLE_OGL_STENCIL_DEBUG
|
||||||
header += "#define ENABLE_OGL_STENCIL_DEBUG 1\n";
|
header += "#define ENABLE_OGL_STENCIL_DEBUG 1\n";
|
||||||
|
@ -413,7 +417,7 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent
|
||||||
|
|
||||||
std::string header = GenGlslHeader(entry, type, macro_sel);
|
std::string header = GenGlslHeader(entry, type, macro_sel);
|
||||||
int shader_nb = 1;
|
int shader_nb = 1;
|
||||||
#if 0
|
#if 1
|
||||||
sources[0] = header.c_str();
|
sources[0] = header.c_str();
|
||||||
sources[1] = glsl_h_code;
|
sources[1] = glsl_h_code;
|
||||||
shader_nb++;
|
shader_nb++;
|
||||||
|
|
|
@ -56,6 +56,9 @@ void GSDeviceOGL::CreateTextureFX()
|
||||||
|
|
||||||
for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++)
|
for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++)
|
||||||
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
|
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
|
||||||
|
|
||||||
|
// Help to debug FS in apitrace
|
||||||
|
m_apitrace = CompilePS(PSSelector());
|
||||||
}
|
}
|
||||||
|
|
||||||
GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel)
|
GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel)
|
||||||
|
|
|
@ -204,9 +204,9 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read)
|
||||||
|
|
||||||
// Bunch of constant parameter
|
// Bunch of constant parameter
|
||||||
switch (m_format) {
|
switch (m_format) {
|
||||||
case GL_R32UI:
|
case GL_R32I:
|
||||||
m_int_format = GL_RED_INTEGER;
|
m_int_format = GL_RED_INTEGER;
|
||||||
m_int_type = GL_UNSIGNED_INT;
|
m_int_type = GL_INT;
|
||||||
m_int_alignment = 4;
|
m_int_alignment = 4;
|
||||||
m_int_shift = 2;
|
m_int_shift = 2;
|
||||||
break;
|
break;
|
||||||
|
@ -559,7 +559,7 @@ void GSTextureOGL::SaveRaw(const string& fn, const void* image, uint32 pitch)
|
||||||
|
|
||||||
for(int h = m_size.y; h > 0; h--) {
|
for(int h = m_size.y; h > 0; h--) {
|
||||||
for (int w = m_size.x; w > 0; w--, data += 1) {
|
for (int w = m_size.x; w > 0; w--, data += 1) {
|
||||||
if (*data == 0xffffffff)
|
if (*data > 0xffffff)
|
||||||
fprintf(fp, "");
|
fprintf(fp, "");
|
||||||
else {
|
else {
|
||||||
fprintf(fp, "%x", *data);
|
fprintf(fp, "%x", *data);
|
||||||
|
@ -594,11 +594,11 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
|
||||||
glReadPixels(0, 0, m_size.x, m_size.y, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image);
|
glReadPixels(0, 0, m_size.x, m_size.y, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image);
|
||||||
|
|
||||||
gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0);
|
gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0);
|
||||||
} else if(m_format == GL_R32UI) {
|
} else if(m_format == GL_R32I) {
|
||||||
gl_ActiveTexture(GL_TEXTURE0 + 6);
|
gl_ActiveTexture(GL_TEXTURE0 + 6);
|
||||||
glBindTexture(GL_TEXTURE_2D, m_texture_id);
|
glBindTexture(GL_TEXTURE_2D, m_texture_id);
|
||||||
|
|
||||||
glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, image);
|
glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_INT, image);
|
||||||
SaveRaw(fn, image, pitch);
|
SaveRaw(fn, image, pitch);
|
||||||
|
|
||||||
// Not supported in Save function
|
// Not supported in Save function
|
||||||
|
|
|
@ -817,7 +817,7 @@ static const char* tfx_glsl =
|
||||||
"#ifndef DISABLE_GL42_image\n"
|
"#ifndef DISABLE_GL42_image\n"
|
||||||
"#if PS_DATE > 0\n"
|
"#if PS_DATE > 0\n"
|
||||||
"// FIXME how to declare memory access\n"
|
"// FIXME how to declare memory access\n"
|
||||||
"layout(r32ui, binding = 0) coherent uniform uimage2D img_prim_min;\n"
|
"layout(r32i, binding = 0) coherent uniform iimage2D img_prim_min;\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"#else\n"
|
"#else\n"
|
||||||
"// use basic stencil\n"
|
"// use basic stencil\n"
|
||||||
|
@ -1316,6 +1316,16 @@ static const char* tfx_glsl =
|
||||||
"#if !GL_ES\n"
|
"#if !GL_ES\n"
|
||||||
"void ps_main()\n"
|
"void ps_main()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
|
"#if PS_DATE == 3 && !defined(DISABLE_GL42_image)\n"
|
||||||
|
" int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));\n"
|
||||||
|
" // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n"
|
||||||
|
" // the bad alpha value so we must keep it.\n"
|
||||||
|
"\n"
|
||||||
|
" if (gl_PrimitiveID > stencil_ceil) {\n"
|
||||||
|
" discard;\n"
|
||||||
|
" }\n"
|
||||||
|
"#endif\n"
|
||||||
|
"\n"
|
||||||
" vec4 c = ps_color();\n"
|
" vec4 c = ps_color();\n"
|
||||||
"\n"
|
"\n"
|
||||||
" float alpha = c.a * 2.0;\n"
|
" float alpha = c.a * 2.0;\n"
|
||||||
|
@ -1347,33 +1357,11 @@ static const char* tfx_glsl =
|
||||||
" }\n"
|
" }\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"\n"
|
"\n"
|
||||||
" // TODO\n"
|
|
||||||
" // warning non uniform flow ???\n"
|
|
||||||
"#if PS_DATE == 3 && !defined(DISABLE_GL42_image)\n"
|
|
||||||
" uint stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));\n"
|
|
||||||
" // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n"
|
|
||||||
" // the bad alpha value so we must keep it.\n"
|
|
||||||
"#if 0\n"
|
|
||||||
" if (stencil_ceil > 0)\n"
|
|
||||||
" c = vec4(1.0, 0.0, 0.0, 1.0);\n"
|
|
||||||
" else\n"
|
|
||||||
" c = vec4(0.0, 1.0, 0.0, 1.0);\n"
|
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
|
||||||
"#if 1\n"
|
|
||||||
" if (gl_PrimitiveID > stencil_ceil) {\n"
|
|
||||||
" discard;\n"
|
|
||||||
" }\n"
|
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
|
||||||
"\n"
|
"\n"
|
||||||
"#if (PS_DATE == 2 || PS_DATE == 1) && !defined(DISABLE_GL42_image)\n"
|
"#if (PS_DATE == 2 || PS_DATE == 1) && !defined(DISABLE_GL42_image)\n"
|
||||||
" // Don't write anything on the framebuffer\n"
|
" // Don't write anything on the framebuffer\n"
|
||||||
" // Note: you can't use discard because it will also drop\n"
|
" // Note: you can't use discard because it will also drop\n"
|
||||||
" // image operation\n"
|
" // image operation\n"
|
||||||
" // Note2: output will be disabled too in opengl\n"
|
|
||||||
"#else\n"
|
"#else\n"
|
||||||
" SV_Target0 = c;\n"
|
" SV_Target0 = c;\n"
|
||||||
" SV_Target1 = vec4(alpha, alpha, alpha, alpha);\n"
|
" SV_Target1 = vec4(alpha, alpha, alpha, alpha);\n"
|
||||||
|
|
|
@ -307,7 +307,7 @@ layout(binding = 1) uniform sampler2D PaletteSampler;
|
||||||
#ifndef DISABLE_GL42_image
|
#ifndef DISABLE_GL42_image
|
||||||
#if PS_DATE > 0
|
#if PS_DATE > 0
|
||||||
// FIXME how to declare memory access
|
// FIXME how to declare memory access
|
||||||
layout(r32ui, binding = 0) coherent uniform uimage2D img_prim_min;
|
layout(r32i, binding = 0) coherent uniform iimage2D img_prim_min;
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
// use basic stencil
|
// use basic stencil
|
||||||
|
@ -806,6 +806,16 @@ void ps_main()
|
||||||
#if !GL_ES
|
#if !GL_ES
|
||||||
void ps_main()
|
void ps_main()
|
||||||
{
|
{
|
||||||
|
#if PS_DATE == 3 && !defined(DISABLE_GL42_image)
|
||||||
|
int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));
|
||||||
|
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
|
||||||
|
// the bad alpha value so we must keep it.
|
||||||
|
|
||||||
|
if (gl_PrimitiveID > stencil_ceil) {
|
||||||
|
discard;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
vec4 c = ps_color();
|
vec4 c = ps_color();
|
||||||
|
|
||||||
float alpha = c.a * 2.0;
|
float alpha = c.a * 2.0;
|
||||||
|
@ -837,33 +847,11 @@ void ps_main()
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// TODO
|
|
||||||
// warning non uniform flow ???
|
|
||||||
#if PS_DATE == 3 && !defined(DISABLE_GL42_image)
|
|
||||||
uint stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));
|
|
||||||
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
|
|
||||||
// the bad alpha value so we must keep it.
|
|
||||||
#if 0
|
|
||||||
if (stencil_ceil > 0)
|
|
||||||
c = vec4(1.0, 0.0, 0.0, 1.0);
|
|
||||||
else
|
|
||||||
c = vec4(0.0, 1.0, 0.0, 1.0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
if (gl_PrimitiveID > stencil_ceil) {
|
|
||||||
discard;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#if (PS_DATE == 2 || PS_DATE == 1) && !defined(DISABLE_GL42_image)
|
#if (PS_DATE == 2 || PS_DATE == 1) && !defined(DISABLE_GL42_image)
|
||||||
// Don't write anything on the framebuffer
|
// Don't write anything on the framebuffer
|
||||||
// Note: you can't use discard because it will also drop
|
// Note: you can't use discard because it will also drop
|
||||||
// image operation
|
// image operation
|
||||||
// Note2: output will be disabled too in opengl
|
|
||||||
#else
|
#else
|
||||||
SV_Target0 = c;
|
SV_Target0 = c;
|
||||||
SV_Target1 = vec4(alpha, alpha, alpha, alpha);
|
SV_Target1 = vec4(alpha, alpha, alpha, alpha);
|
||||||
|
|
Loading…
Reference in New Issue