mirror of https://github.com/PCSX2/pcsx2.git
gsdx ogl: Test the ARB_shader_subroutine GL4.0 extension
The idea was to replace shader program swith by pointer function calls inside shaders. At least parameters that are often changed between draw call. So far I only ported atst and colclip. Unfortunately code is "slower" (on GSdx standalone). For the moment keep the code but disabled. If I understand well the validation of program is done in the "driver thread" but the additional call are done in the overloaded MTGS thread. Apitrace profiling shows faster GPU draw calls. Another possibility is that the driver still need to validate the draw call because of others state change. Here some stats on colin3 (90 frames): without subroutine: UseProgram 125246 with subroutine: UseProgram 2906, subroutine 125945 => 3605 extra calls overhead (not all parameters are ported to subroutine) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5715 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c9755361ec
commit
0f603a98d5
|
@ -81,7 +81,9 @@ PFNGLUSEPROGRAMSTAGESPROC gl_UseProgramStages = NULL;
|
|||
PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer = NULL;
|
||||
PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer = NULL;
|
||||
PFNGLBUFFERSUBDATAPROC gl_BufferSubData = NULL;
|
||||
// GL 4.1
|
||||
// GL4.0
|
||||
PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv = NULL;
|
||||
// GL4.1
|
||||
PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline = NULL;
|
||||
PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines = NULL;
|
||||
PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines = NULL;
|
||||
|
@ -122,9 +124,10 @@ namespace GLLoader {
|
|||
bool found_GL_ARB_clear_texture = false; // Don't know if GL3 hardawe can support it
|
||||
bool found_GL_ARB_buffer_storage = false;
|
||||
// GL4 hardware
|
||||
bool found_GL_ARB_copy_image = false;
|
||||
bool found_GL_ARB_copy_image = false; // Not sure actually
|
||||
bool found_GL_ARB_gpu_shader5 = false;
|
||||
bool found_GL_ARB_shader_image_load_store = false;
|
||||
bool found_GL_ARB_shader_subroutine = false;
|
||||
|
||||
// Mandatory for FULL GL (but optional for GLES)
|
||||
bool found_GL_ARB_multi_bind = false; // Not yet. Wait Mesa & AMD drivers
|
||||
|
@ -221,6 +224,17 @@ namespace GLLoader {
|
|||
if (ext.compare("GL_ARB_copy_image") == 0) found_GL_ARB_copy_image = true;
|
||||
if (ext.compare("GL_ARB_gpu_shader5") == 0) found_GL_ARB_gpu_shader5 = true;
|
||||
if (ext.compare("GL_ARB_shader_image_load_store") == 0) found_GL_ARB_shader_image_load_store = true;
|
||||
#if 0
|
||||
// Strangely it doesn't provide the speed boost as expected.
|
||||
// Note: only atst/colclip was replaced with subroutine for the moment. It replace 2000 program switch on
|
||||
// colin mcrae 3 by 2100 uniform, but code is slower!
|
||||
//
|
||||
// Current hypothesis: the validation of useprogram is done in the "driver thread" whereas the extra function calls
|
||||
// are done on the overloaded main threads.
|
||||
// Apitrace profiling shows faster GPU draw times
|
||||
|
||||
if (ext.compare("GL_ARB_shader_subroutine") == 0) found_GL_ARB_shader_subroutine = true;
|
||||
#endif
|
||||
#ifdef GL44 // Need to debug the code first
|
||||
if (ext.compare("GL_ARB_clear_texture") == 0) found_GL_ARB_clear_texture = true;
|
||||
if (ext.compare("GL_ARB_multi_bind") == 0) found_GL_ARB_multi_bind = true;
|
||||
|
@ -242,6 +256,7 @@ namespace GLLoader {
|
|||
status &= status_and_override(found_GL_ARB_shader_image_load_store,"GL_ARB_shader_image_load_store");
|
||||
status &= status_and_override(found_GL_ARB_clear_texture,"GL_ARB_clear_texture");
|
||||
status &= status_and_override(found_GL_ARB_buffer_storage,"GL_ARB_buffer_storage");
|
||||
status &= status_and_override(found_GL_ARB_shader_subroutine,"GL_ARB_shader_subroutine");
|
||||
|
||||
status &= status_and_override(found_GL_ARB_texture_storage, "GL_ARB_texture_storage", true);
|
||||
status &= status_and_override(found_GL_ARB_shading_language_420pack,"GL_ARB_shading_language_420pack");
|
||||
|
|
|
@ -134,6 +134,8 @@ extern PFNGLUSEPROGRAMSTAGESPROC gl_UseProgramStages;
|
|||
extern PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer;
|
||||
extern PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer;
|
||||
extern PFNGLBUFFERSUBDATAPROC gl_BufferSubData;
|
||||
// GL4.0
|
||||
extern PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv;
|
||||
// GL4.1
|
||||
extern PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline;
|
||||
extern PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines;
|
||||
|
@ -254,4 +256,5 @@ namespace GLLoader {
|
|||
extern bool found_GL_ARB_clear_texture;
|
||||
extern bool found_GL_ARB_multi_bind;
|
||||
extern bool found_GL_ARB_buffer_storage;
|
||||
extern bool found_GL_ARB_shader_subroutine;
|
||||
}
|
||||
|
|
|
@ -62,6 +62,8 @@ namespace GLState {
|
|||
GLuint gs = 0;
|
||||
GLuint vs = 0;
|
||||
GLuint program = 0;
|
||||
bool dirty_prog = false;
|
||||
bool dirty_subroutine_ps = false;
|
||||
#if 0
|
||||
struct {
|
||||
GSVertexBufferStateOGL* vb;
|
||||
|
@ -112,5 +114,7 @@ namespace GLState {
|
|||
gs = 0;
|
||||
vs = 0;
|
||||
program = 0;
|
||||
dirty_prog = false;
|
||||
dirty_subroutine_ps = false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,6 +64,8 @@ namespace GLState {
|
|||
extern GLuint gs;
|
||||
extern GLuint vs;
|
||||
extern GLuint program; // monolith program (when sso isn't supported)
|
||||
extern bool dirty_prog;
|
||||
extern bool dirty_subroutine_ps;
|
||||
|
||||
extern void Clear();
|
||||
}
|
||||
|
|
|
@ -168,7 +168,7 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
|
|||
// ****************************************************************
|
||||
// Various object
|
||||
// ****************************************************************
|
||||
m_shader = new GSShaderOGL(!!theApp.GetConfig("debug_ogl_shader", 1), GLLoader::found_GL_ARB_separate_shader_objects, GLLoader::found_GL_ARB_shading_language_420pack);
|
||||
m_shader = new GSShaderOGL(!!theApp.GetConfig("debug_ogl_shader", 1));
|
||||
|
||||
gl_GenFramebuffers(1, &m_fbo);
|
||||
gl_GenFramebuffers(1, &m_fbo_read);
|
||||
|
@ -572,6 +572,7 @@ void GSDeviceOGL::Barrier(GLbitfield b)
|
|||
//#endif
|
||||
}
|
||||
|
||||
/* Note: must be here because tfx_glsl is static */
|
||||
GLuint GSDeviceOGL::CompileVS(VSSelector sel)
|
||||
{
|
||||
std::string macro = format("#define VS_BPPZ %d\n", sel.bppz)
|
||||
|
@ -582,6 +583,7 @@ GLuint GSDeviceOGL::CompileVS(VSSelector sel)
|
|||
return m_shader->Compile("tfx.glsl", "vs_main", GL_VERTEX_SHADER, tfx_glsl, macro);
|
||||
}
|
||||
|
||||
/* Note: must be here because tfx_glsl is static */
|
||||
GLuint GSDeviceOGL::CompileGS(GSSelector sel)
|
||||
{
|
||||
// Easy case
|
||||
|
@ -598,6 +600,7 @@ GLuint GSDeviceOGL::CompileGS(GSSelector sel)
|
|||
#endif
|
||||
}
|
||||
|
||||
/* Note: must be here because tfx_glsl is static */
|
||||
GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
||||
{
|
||||
std::string macro = format("#define PS_FST %d\n", sel.fst)
|
||||
|
@ -720,6 +723,14 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
|
|||
|
||||
GSVector2i ds = dt->GetSize();
|
||||
|
||||
// WARNING: setup of the program must be done first. So you can setup
|
||||
// 1/ subroutine uniform
|
||||
// 2/ bindless texture uniform
|
||||
// 3/ others uniform?
|
||||
m_shader->VS(m_convert.vs);
|
||||
m_shader->GS(0);
|
||||
m_shader->PS(ps);
|
||||
|
||||
// ************************************
|
||||
// om
|
||||
// ************************************
|
||||
|
@ -764,32 +775,17 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
|
|||
{GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(flip_sr.x, flip_sr.w)},
|
||||
{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(flip_sr.z, flip_sr.w)},
|
||||
};
|
||||
//fprintf(stderr, "A:%fx%f B:%fx%f\n", left, top, bottom, right);
|
||||
//fprintf(stderr, "SR: %f %f %f %f\n", sr.x, sr.y, sr.z, sr.w);
|
||||
|
||||
IASetVertexState(m_vb_sr);
|
||||
IASetVertexBuffer(vertices, 4);
|
||||
IASetPrimitiveTopology(GL_TRIANGLE_STRIP);
|
||||
|
||||
// ************************************
|
||||
// vs
|
||||
// Texture
|
||||
// ************************************
|
||||
|
||||
m_shader->VS(m_convert.vs);
|
||||
|
||||
// ************************************
|
||||
// gs
|
||||
// ************************************
|
||||
|
||||
m_shader->GS(0);
|
||||
|
||||
// ************************************
|
||||
// ps
|
||||
// ************************************
|
||||
|
||||
PSSetShaderResource(0, static_cast<GSTextureOGL*>(st)->GetID());
|
||||
PSSetShaderResource(static_cast<GSTextureOGL*>(st)->GetID());
|
||||
PSSetSamplerState(linear ? m_convert.ln : m_convert.pt);
|
||||
m_shader->PS(ps);
|
||||
|
||||
// ************************************
|
||||
// Draw
|
||||
|
@ -886,6 +882,14 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
|
|||
|
||||
ClearStencil(ds, 0);
|
||||
|
||||
// WARNING: setup of the program must be done first. So you can setup
|
||||
// 1/ subroutine uniform
|
||||
// 2/ bindless texture uniform
|
||||
// 3/ others uniform?
|
||||
m_shader->VS(m_convert.vs);
|
||||
m_shader->GS(0);
|
||||
m_shader->PS(m_convert.ps[datm ? 2 : 3]);
|
||||
|
||||
// om
|
||||
|
||||
OMSetDepthStencilState(m_date.dss, 1);
|
||||
|
@ -898,19 +902,11 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
|
|||
IASetVertexBuffer(vertices, 4);
|
||||
IASetPrimitiveTopology(GL_TRIANGLE_STRIP);
|
||||
|
||||
// vs
|
||||
|
||||
m_shader->VS(m_convert.vs);
|
||||
// Texture
|
||||
|
||||
// gs
|
||||
|
||||
m_shader->GS(0);
|
||||
|
||||
// ps
|
||||
|
||||
PSSetShaderResource(0, static_cast<GSTextureOGL*>(rt)->GetID());
|
||||
PSSetShaderResource(static_cast<GSTextureOGL*>(rt)->GetID());
|
||||
PSSetSamplerState(m_convert.pt);
|
||||
m_shader->PS(m_convert.ps[datm ? 2 : 3]);
|
||||
|
||||
//
|
||||
|
||||
|
@ -966,16 +962,16 @@ void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology)
|
|||
m_state.vb->SetTopology(topology);
|
||||
}
|
||||
|
||||
void GSDeviceOGL::PSSetShaderResource(const int i, GLuint sr)
|
||||
void GSDeviceOGL::PSSetShaderResource(GLuint sr)
|
||||
{
|
||||
if (GLState::tex_unit[i] != sr) {
|
||||
GLState::tex_unit[i] = sr;
|
||||
if (GLState::tex_unit[0] != sr) {
|
||||
GLState::tex_unit[0] = sr;
|
||||
|
||||
if (GLLoader::found_GL_ARB_multi_bind) {
|
||||
GLuint textures[1] = {sr};
|
||||
gl_BindTextures(i, 1, textures);
|
||||
gl_BindTextures(0, 1, textures);
|
||||
} else {
|
||||
gl_ActiveTexture(GL_TEXTURE0 + i);
|
||||
gl_ActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, sr);
|
||||
|
||||
// Get back to the expected active texture unit
|
||||
|
@ -987,8 +983,21 @@ void GSDeviceOGL::PSSetShaderResource(const int i, GLuint sr)
|
|||
void GSDeviceOGL::PSSetShaderResources(GLuint tex[2])
|
||||
{
|
||||
if (GLState::tex_unit[0] != tex[0] || GLState::tex_unit[1] != tex[1]) {
|
||||
GLuint textures[2] = {tex[0], tex[1]};
|
||||
gl_BindTextures(0, 2, textures);
|
||||
GLState::tex_unit[0] = tex[0];
|
||||
GLState::tex_unit[1] = tex[1];
|
||||
|
||||
if (GLLoader::found_GL_ARB_multi_bind) {
|
||||
gl_BindTextures(0, 2, tex);
|
||||
} else {
|
||||
gl_ActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, tex[0]);
|
||||
|
||||
gl_ActiveTexture(GL_TEXTURE0 + 1);
|
||||
glBindTexture(GL_TEXTURE_2D, tex[1]);
|
||||
|
||||
// Get back to the expected active texture unit
|
||||
gl_ActiveTexture(GL_TEXTURE0 + 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -606,10 +606,9 @@ class GSDeviceOGL : public GSDevice
|
|||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
void IASetVertexState(GSVertexBufferStateOGL* vb = NULL);
|
||||
|
||||
void PSSetShaderResource(const int i, GLuint sr);
|
||||
void PSSetShaderResource(GLuint sr);
|
||||
void PSSetShaderResources(GLuint tex[2]);
|
||||
void PSSetSamplerState(GLuint ss);
|
||||
void PSSetSamplerStates(const int count, const GLuint* samplers);
|
||||
|
||||
void OMSetDepthStencilState(GSDepthStencilOGL* dss, uint8 sref);
|
||||
void OMSetBlendState(GSBlendStateOGL* bs, float bf);
|
||||
|
@ -627,9 +626,10 @@ class GSDeviceOGL : public GSDevice
|
|||
|
||||
|
||||
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||
void SetupVS(VSSelector sel);
|
||||
void SetupGS(GSSelector sel);
|
||||
void SetupPS(PSSelector sel, const PSConstantBuffer* cb);
|
||||
void SetupPS(PSSelector sel);
|
||||
void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb);
|
||||
void SetupSampler(PSSelector sel, PSSamplerSelector ssel);
|
||||
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
|
||||
|
||||
|
|
|
@ -163,6 +163,7 @@ void GSRendererOGL::SetupIA()
|
|||
dev->IASetVertexState();
|
||||
|
||||
if(UserHacks_WildHack && !isPackedUV_HackFlag) {
|
||||
// FIXME: why not put it on the Vertex shader
|
||||
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next))
|
||||
{
|
||||
GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
|
||||
|
@ -441,6 +442,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
om_dssel.alpha_stencil = 1;
|
||||
}
|
||||
|
||||
// By default don't use texture
|
||||
ps_sel.tfx = 4;
|
||||
|
||||
if(tex)
|
||||
{
|
||||
const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[context->TEX0.PSM];
|
||||
|
@ -456,6 +460,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
ps_sel.tcc = context->TEX0.TCC;
|
||||
ps_sel.ltf = bilinear && !simple_sample;
|
||||
ps_sel.spritehack = tex->m_spritehack_t;
|
||||
// FIXME the ati is currently disabled on the shader. I need to find a .gs to test that we got same
|
||||
// bug on opengl
|
||||
ps_sel.point_sampler = !(bilinear && simple_sample);
|
||||
|
||||
int w = tex->m_texture->GetWidth();
|
||||
|
@ -491,23 +497,30 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
|
||||
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
|
||||
ps_ssel.ltf = bilinear && simple_sample;
|
||||
|
||||
dev->SetupSampler(ps_sel, ps_ssel);
|
||||
if (tex->m_palette) {
|
||||
if (GLLoader::found_GL_ARB_multi_bind) {
|
||||
GLuint textures[2] = {static_cast<GSTextureOGL*>(tex->m_texture)->GetID(), static_cast<GSTextureOGL*>(tex->m_palette)->GetID()};
|
||||
dev->PSSetShaderResources(textures);
|
||||
} else {
|
||||
dev->PSSetShaderResource(1, static_cast<GSTextureOGL*>(tex->m_palette)->GetID());
|
||||
dev->PSSetShaderResource(0, static_cast<GSTextureOGL*>(tex->m_texture)->GetID());
|
||||
}
|
||||
} else {
|
||||
dev->PSSetShaderResource(0, static_cast<GSTextureOGL*>(tex->m_texture)->GetID());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_sel.tfx = 4;
|
||||
|
||||
// WARNING: setup of the program must be done first. So you can setup
|
||||
// 1/ subroutine uniform
|
||||
// 2/ bindless texture uniform
|
||||
// 3/ others uniform?
|
||||
dev->SetupVS(vs_sel);
|
||||
dev->SetupGS(gs_sel);
|
||||
dev->SetupPS(ps_sel);
|
||||
|
||||
// Note: bindless texture will use uniform so it must be done after the program setup
|
||||
if(tex) {
|
||||
if (tex->m_palette) {
|
||||
// 2 textures (main + palette)
|
||||
dev->SetupSampler(ps_sel, ps_ssel);
|
||||
|
||||
GLuint textures[2] = {static_cast<GSTextureOGL*>(tex->m_texture)->GetID(), static_cast<GSTextureOGL*>(tex->m_palette)->GetID()};
|
||||
dev->PSSetShaderResources(textures);
|
||||
} else if (tex->m_texture) {
|
||||
// Only main texture
|
||||
dev->SetupSampler(ps_sel, ps_ssel);
|
||||
|
||||
dev->PSSetShaderResource(static_cast<GSTextureOGL*>(tex->m_texture)->GetID());
|
||||
}
|
||||
}
|
||||
|
||||
// rs
|
||||
|
@ -521,9 +534,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
SetupIA();
|
||||
|
||||
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||
dev->SetupVS(vs_sel, &vs_cb);
|
||||
dev->SetupGS(gs_sel);
|
||||
dev->SetupPS(ps_sel, &ps_cb);
|
||||
dev->SetupCB(&vs_cb, &ps_cb);
|
||||
|
||||
if (advance_DATE) {
|
||||
// Create an r32ui image that will contain primitive ID
|
||||
|
@ -539,7 +550,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
dev->OMSetWriteBuffer();
|
||||
|
||||
ps_sel.date = 3;
|
||||
dev->SetupPS(ps_sel, &ps_cb);
|
||||
dev->SetupPS(ps_sel);
|
||||
|
||||
// Be sure that first pass is finished !
|
||||
dev->Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
|
||||
|
@ -558,7 +569,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
ps_selneg.colclip = 2;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bselneg, afix);
|
||||
dev->SetupPS(ps_selneg, &ps_cb);
|
||||
dev->SetupPS(ps_selneg);
|
||||
|
||||
dev->DrawIndexedPrimitive();
|
||||
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||
|
@ -573,7 +584,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
ps_sel.atst = iatst[ps_sel.atst];
|
||||
|
||||
dev->SetupPS(ps_sel, &ps_cb);
|
||||
dev->SetupPS(ps_sel);
|
||||
|
||||
bool z = om_dssel.zwe;
|
||||
bool r = om_bsel.wr;
|
||||
|
@ -583,11 +594,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
switch(context->TEST.AFAIL)
|
||||
{
|
||||
case AFAIL_KEEP: z = r = g = b = a = false; break; // none
|
||||
case AFAIL_FB_ONLY: z = false; break; // rgba
|
||||
case AFAIL_ZB_ONLY: r = g = b = a = false; break; // z
|
||||
case AFAIL_RGB_ONLY: z = a = false; break; // rgb
|
||||
default: __assume(0);
|
||||
case AFAIL_KEEP: z = r = g = b = a = false; break; // none
|
||||
case AFAIL_FB_ONLY: z = false; break; // rgba
|
||||
case AFAIL_ZB_ONLY: r = g = b = a = false; break; // z
|
||||
case AFAIL_RGB_ONLY: z = a = false; break; // rgb
|
||||
default: __assume(0);
|
||||
}
|
||||
|
||||
if(z || r || g || b || a)
|
||||
|
@ -611,7 +622,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
ps_selneg.colclip = 2;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bselneg, afix);
|
||||
dev->SetupPS(ps_selneg, &ps_cb);
|
||||
dev->SetupPS(ps_selneg);
|
||||
|
||||
dev->DrawIndexedPrimitive();
|
||||
}
|
||||
|
|
|
@ -23,14 +23,16 @@
|
|||
#include "GSShaderOGL.h"
|
||||
#include "GLState.h"
|
||||
|
||||
GSShaderOGL::GSShaderOGL(bool debug, bool sso, bool glsl420) :
|
||||
GSShaderOGL::GSShaderOGL(bool debug) :
|
||||
m_debug_shader(debug),
|
||||
m_sso(sso),
|
||||
m_glsl420(glsl420)
|
||||
m_sub_count(0)
|
||||
{
|
||||
|
||||
memset(&m_ps_sub, 0, countof(m_ps_sub)*sizeof(GLuint));
|
||||
|
||||
m_single_prog.clear();
|
||||
#ifndef ENABLE_GLES
|
||||
if (sso) {
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
gl_GenProgramPipelines(1, &m_pipeline);
|
||||
gl_BindProgramPipeline(m_pipeline);
|
||||
}
|
||||
|
@ -40,7 +42,7 @@ GSShaderOGL::GSShaderOGL(bool debug, bool sso, bool glsl420) :
|
|||
GSShaderOGL::~GSShaderOGL()
|
||||
{
|
||||
#ifndef ENABLE_GLES
|
||||
if (m_sso)
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects)
|
||||
gl_DeleteProgramPipelines(1, &m_pipeline);
|
||||
#endif
|
||||
|
||||
|
@ -53,21 +55,35 @@ void GSShaderOGL::VS(GLuint s)
|
|||
if (GLState::vs != s)
|
||||
{
|
||||
GLState::vs = s;
|
||||
GLState::dirty_prog = true;
|
||||
#ifndef ENABLE_GLES
|
||||
if (m_sso)
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects)
|
||||
gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void GSShaderOGL::PS(GLuint s)
|
||||
void GSShaderOGL::PS_subroutine(GLuint *sub)
|
||||
{
|
||||
if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1])) {
|
||||
m_ps_sub[0] = sub[0];
|
||||
m_ps_sub[1] = sub[1];
|
||||
GLState::dirty_subroutine_ps = true;
|
||||
}
|
||||
}
|
||||
|
||||
void GSShaderOGL::PS(GLuint s, GLuint sub_count)
|
||||
{
|
||||
if (GLState::ps != s)
|
||||
{
|
||||
m_sub_count = sub_count;
|
||||
|
||||
GLState::ps = s;
|
||||
GLState::dirty_prog = true;
|
||||
#ifndef ENABLE_GLES
|
||||
if (m_sso)
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -77,8 +93,9 @@ void GSShaderOGL::GS(GLuint s)
|
|||
if (GLState::gs != s)
|
||||
{
|
||||
GLState::gs = s;
|
||||
GLState::dirty_prog = true;
|
||||
#ifndef ENABLE_GLES
|
||||
if (m_sso)
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects)
|
||||
gl_UseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, s);
|
||||
#endif
|
||||
}
|
||||
|
@ -97,7 +114,7 @@ void GSShaderOGL::SetSamplerBinding(GLuint prog, GLchar* name, GLuint binding)
|
|||
{
|
||||
GLint loc = gl_GetUniformLocation(prog, name);
|
||||
if (loc != -1) {
|
||||
if (m_sso) {
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
#ifndef ENABLE_GLES
|
||||
gl_ProgramUniform1i(prog, loc, binding);
|
||||
#endif
|
||||
|
@ -109,9 +126,9 @@ void GSShaderOGL::SetSamplerBinding(GLuint prog, GLchar* name, GLuint binding)
|
|||
|
||||
void GSShaderOGL::SetupUniform()
|
||||
{
|
||||
if (m_glsl420) return;
|
||||
if (GLLoader::found_GL_ARB_shading_language_420pack) return;
|
||||
|
||||
if (m_sso) {
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
SetUniformBinding(GLState::vs, "cb20", 20);
|
||||
SetUniformBinding(GLState::ps, "cb21", 21);
|
||||
|
||||
|
@ -138,6 +155,17 @@ void GSShaderOGL::SetupUniform()
|
|||
}
|
||||
}
|
||||
|
||||
void GSShaderOGL::SetSubroutineUniform()
|
||||
{
|
||||
if (!GLLoader::found_GL_ARB_shader_subroutine) return;
|
||||
if (m_sub_count == 0) return;
|
||||
|
||||
if (GLState::dirty_subroutine_ps || GLState::dirty_prog)
|
||||
gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_sub_count, m_ps_sub);
|
||||
|
||||
GLState::dirty_subroutine_ps = false;
|
||||
}
|
||||
|
||||
bool GSShaderOGL::ValidateShader(GLuint s)
|
||||
{
|
||||
if (!m_debug_shader) return true;
|
||||
|
@ -223,37 +251,44 @@ GLuint GSShaderOGL::LinkNewProgram()
|
|||
|
||||
void GSShaderOGL::UseProgram()
|
||||
{
|
||||
hash_map<uint64, GLuint >::iterator it;
|
||||
if (!m_sso) {
|
||||
// Note: shader are integer lookup pointer. They start from 1 and incr
|
||||
// every time you create a new shader OR a new program.
|
||||
// Note2: vs & gs are precompiled at startup. FGLRX and radeon got value < 128.
|
||||
// We migth be able to pack the value in a 32bits int
|
||||
// I would need to check the behavior on Nvidia (pause/resume).
|
||||
uint64 sel = (uint64)GLState::vs << 40 | (uint64)GLState::gs << 20 | GLState::ps;
|
||||
it = m_single_prog.find(sel);
|
||||
if (it == m_single_prog.end()) {
|
||||
GLState::program = LinkNewProgram();
|
||||
m_single_prog[sel] = GLState::program;
|
||||
if (GLState::dirty_prog) {
|
||||
if (!GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
hash_map<uint64, GLuint >::iterator it;
|
||||
// Note: shader are integer lookup pointer. They start from 1 and incr
|
||||
// every time you create a new shader OR a new program.
|
||||
// Note2: vs & gs are precompiled at startup. FGLRX and radeon got value < 128.
|
||||
// We migth be able to pack the value in a 32bits int
|
||||
// I would need to check the behavior on Nvidia (pause/resume).
|
||||
uint64 sel = (uint64)GLState::vs << 40 | (uint64)GLState::gs << 20 | GLState::ps;
|
||||
it = m_single_prog.find(sel);
|
||||
if (it == m_single_prog.end()) {
|
||||
GLState::program = LinkNewProgram();
|
||||
m_single_prog[sel] = GLState::program;
|
||||
|
||||
ValidateProgram(GLState::program);
|
||||
ValidateProgram(GLState::program);
|
||||
|
||||
gl_UseProgram(GLState::program);
|
||||
// warning it must be done after the "setup" of the program
|
||||
SetupUniform();
|
||||
} else {
|
||||
GLuint prog = it->second;
|
||||
if (prog != GLState::program) {
|
||||
GLState::program = prog;
|
||||
gl_UseProgram(GLState::program);
|
||||
|
||||
// warning it must be done after the "setup" of the program
|
||||
SetupUniform();
|
||||
} else {
|
||||
GLuint prog = it->second;
|
||||
if (prog != GLState::program) {
|
||||
GLState::program = prog;
|
||||
gl_UseProgram(GLState::program);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
ValidatePipeline(m_pipeline);
|
||||
|
||||
SetupUniform();
|
||||
}
|
||||
|
||||
} else {
|
||||
ValidatePipeline(m_pipeline);
|
||||
|
||||
SetupUniform();
|
||||
}
|
||||
|
||||
SetSubroutineUniform();
|
||||
|
||||
GLState::dirty_prog = false;
|
||||
}
|
||||
|
||||
std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, const std::string& macro)
|
||||
|
@ -266,13 +301,13 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
|
|||
} else {
|
||||
header = "#version 330 core\n";
|
||||
}
|
||||
if (m_glsl420) {
|
||||
if (GLLoader::found_GL_ARB_shading_language_420pack) {
|
||||
// Need GL version 420
|
||||
header += "#extension GL_ARB_shading_language_420pack: require\n";
|
||||
} else {
|
||||
header += "#define DISABLE_GL42\n";
|
||||
}
|
||||
if (m_sso) {
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
// Need GL version 410
|
||||
header += "#extension GL_ARB_separate_shader_objects : require\n";
|
||||
} else {
|
||||
|
@ -284,6 +319,10 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
|
|||
// Need version 140
|
||||
header += "#extension GL_ARB_uniform_buffer_object : require\n";
|
||||
}
|
||||
if (GLLoader::found_GL_ARB_shader_subroutine) {
|
||||
// Need GL version 400
|
||||
header += "#define SUBROUTINE_GL40 1\n";
|
||||
}
|
||||
#ifdef ENABLE_OGL_STENCIL_DEBUG
|
||||
header += "#define ENABLE_OGL_STENCIL_DEBUG 1\n";
|
||||
#endif
|
||||
|
@ -351,7 +390,7 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent
|
|||
sources[0] = header.append(glsl_h_code).c_str();
|
||||
#endif
|
||||
|
||||
if (m_sso) {
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
#ifndef ENABLE_GLES
|
||||
program = gl_CreateShaderProgramv(type, shader_nb, sources);
|
||||
#endif
|
||||
|
@ -362,7 +401,7 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent
|
|||
}
|
||||
|
||||
bool status;
|
||||
if (m_sso)
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects)
|
||||
status = ValidateProgram(program);
|
||||
else
|
||||
status = ValidateShader(program);
|
||||
|
@ -378,7 +417,7 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent
|
|||
|
||||
void GSShaderOGL::Delete(GLuint s)
|
||||
{
|
||||
if (m_sso) {
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
gl_DeleteProgram(s);
|
||||
} else {
|
||||
gl_DeleteShader(s);
|
||||
|
|
|
@ -25,10 +25,11 @@ class GSShaderOGL {
|
|||
GLuint m_pipeline;
|
||||
hash_map<uint64, GLuint > m_single_prog;
|
||||
const bool m_debug_shader;
|
||||
const bool m_sso;
|
||||
const bool m_glsl420;
|
||||
GLuint m_sub_count;
|
||||
|
||||
GLuint m_ps_sub[2];
|
||||
|
||||
void SetSubroutineUniform();
|
||||
void SetupUniform();
|
||||
void SetUniformBinding(GLuint prog, GLchar* name, GLuint binding);
|
||||
void SetSamplerBinding(GLuint prog, GLchar* name, GLuint binding);
|
||||
|
@ -41,11 +42,12 @@ class GSShaderOGL {
|
|||
GLuint LinkNewProgram();
|
||||
|
||||
public:
|
||||
GSShaderOGL(bool debug, bool sso, bool glsl420);
|
||||
GSShaderOGL(bool debug);
|
||||
~GSShaderOGL();
|
||||
|
||||
void GS(GLuint s);
|
||||
void PS(GLuint s);
|
||||
void PS(GLuint s, GLuint sub_count = 0);
|
||||
void PS_subroutine(GLuint *sub);
|
||||
void VS(GLuint s);
|
||||
|
||||
void UseProgram();
|
||||
|
|
|
@ -133,14 +133,22 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix)
|
|||
return bs;
|
||||
}
|
||||
|
||||
void GSDeviceOGL::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||
void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb)
|
||||
{
|
||||
if(m_vs_cb_cache.Update(vs_cb)) {
|
||||
m_vs_cb->upload(vs_cb);
|
||||
}
|
||||
|
||||
if(m_ps_cb_cache.Update(ps_cb)) {
|
||||
m_ps_cb->upload(ps_cb);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void GSDeviceOGL::SetupVS(VSSelector sel)
|
||||
{
|
||||
GLuint vs = m_vs[sel];
|
||||
|
||||
if(m_vs_cb_cache.Update(cb)) {
|
||||
m_vs_cb->upload(cb);
|
||||
}
|
||||
|
||||
m_shader->VS(vs);
|
||||
}
|
||||
|
||||
|
@ -151,8 +159,16 @@ void GSDeviceOGL::SetupGS(GSSelector sel)
|
|||
m_shader->GS(gs);
|
||||
}
|
||||
|
||||
void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb)
|
||||
void GSDeviceOGL::SetupPS(PSSelector sel)
|
||||
{
|
||||
if (GLLoader::found_GL_ARB_shader_subroutine) {
|
||||
GLuint sub[2] = {sel.atst, (uint32)sel.colclip + 8};
|
||||
m_shader->PS_subroutine(sub);
|
||||
// Handle by subroutine useless now
|
||||
sel.atst = 0;
|
||||
sel.colclip = 0;
|
||||
}
|
||||
|
||||
// *************************************************************
|
||||
// Static
|
||||
// *************************************************************
|
||||
|
@ -169,11 +185,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb)
|
|||
// *************************************************************
|
||||
// Dynamic
|
||||
// *************************************************************
|
||||
if(m_ps_cb_cache.Update(cb)) {
|
||||
m_ps_cb->upload(cb);
|
||||
}
|
||||
|
||||
m_shader->PS(ps);
|
||||
m_shader->PS(ps, 2);
|
||||
}
|
||||
|
||||
void GSDeviceOGL::SetupSampler(PSSelector sel, PSSamplerSelector ssel)
|
||||
|
|
|
@ -177,7 +177,6 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read)
|
|||
break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
GSTextureOGL::~GSTextureOGL()
|
||||
|
@ -210,7 +209,6 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
|
|||
|
||||
EnableUnit();
|
||||
|
||||
#if 1
|
||||
PboPool::BindPbo();
|
||||
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
|
||||
|
@ -230,47 +228,19 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
|
|||
PboPool::UnbindPbo();
|
||||
|
||||
return true;
|
||||
#else
|
||||
|
||||
#if 0
|
||||
|
||||
// pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift);
|
||||
|
||||
#ifdef _LINUX
|
||||
if (GLLoader::fglrx_buggy_driver && !GLLoader::in_replayer) {
|
||||
// FIXME : it crash on colin mcrae rally 3 (others game too) when the texture is small
|
||||
//if ((pitch >> 2) == 32 || r.width() < 32 || r.height() < 32) {
|
||||
if ((r.width() < 32) || (pitch == 128 && r.width() == 32)) {
|
||||
#ifdef ENABLE_OGL_DEBUG
|
||||
fprintf(stderr, "Skip Texture %dx%d with a pitch of %d pixel. Type %x\n", m_size.x, m_size.y, pitch >>2, m_format);
|
||||
fprintf(stderr, "Box (%d,%d)x(%d,%d)\n", r.x, r.y, r.width(), r.height());
|
||||
#endif
|
||||
|
||||
// FIXME useful?
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data);
|
||||
|
||||
// FIXME useful?
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // Restore default behavior
|
||||
|
||||
return true;
|
||||
#if 0
|
||||
if(m_dev && m_texture)
|
||||
{
|
||||
D3D11_BOX box = {r.left, r.top, 0, r.right, r.bottom, 1};
|
||||
|
||||
m_ctx->UpdateSubresource(m_texture, 0, &box, data, pitch, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -82,6 +82,8 @@ void GSWndGL::PopulateGlFunction()
|
|||
*(void**)&(gl_VertexAttribIPointer) = GetProcAddress("glVertexAttribIPointer");
|
||||
*(void**)&(gl_VertexAttribPointer) = GetProcAddress("glVertexAttribPointer");
|
||||
*(void**)&(gl_BufferSubData) = GetProcAddress("glBufferSubData");
|
||||
// GL4.0
|
||||
*(void**)&(gl_UniformSubroutinesuiv) = GetProcAddress("glUniformSubroutinesuiv");
|
||||
// GL4.1
|
||||
*(void**)&(gl_BindProgramPipeline) = GetProcAddress("glBindProgramPipeline");
|
||||
*(void**)&(gl_DeleteProgramPipelines) = GetProcAddress("glDeleteProgramPipelines");
|
||||
|
|
|
@ -1166,6 +1166,77 @@ static const char* tfx_glsl =
|
|||
"#endif\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// Note layout stuff might require gl4.3\n"
|
||||
"#ifdef SUBROUTINE_GL40\n"
|
||||
"// Function pointer type\n"
|
||||
"subroutine void AlphaTestType(vec4 c);\n"
|
||||
"\n"
|
||||
"// a function pointer variable\n"
|
||||
"layout(location = 0) subroutine uniform AlphaTestType atst;\n"
|
||||
"\n"
|
||||
"// The function attached to AlphaTestType\n"
|
||||
"layout(index = 0) subroutine(AlphaTestType)\n"
|
||||
"void atest_never(vec4 c)\n"
|
||||
"{\n"
|
||||
" discard;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"layout(index = 1) subroutine(AlphaTestType)\n"
|
||||
"void atest_always(vec4 c)\n"
|
||||
"{\n"
|
||||
" // Nothing to do\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"layout(index = 2) subroutine(AlphaTestType)\n"
|
||||
"void atest_l(vec4 c)\n"
|
||||
"{\n"
|
||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
||||
" if (PS_SPRITEHACK == 0)\n"
|
||||
" if ((AREF - a - 0.5f) < 0.0f)\n"
|
||||
" discard;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"layout(index = 3) subroutine(AlphaTestType)\n"
|
||||
"void atest_le(vec4 c)\n"
|
||||
"{\n"
|
||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
||||
" if ((AREF - a + 0.5f) < 0.0f)\n"
|
||||
" discard;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"layout(index = 4) subroutine(AlphaTestType)\n"
|
||||
"void atest_e(vec4 c)\n"
|
||||
"{\n"
|
||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
||||
" if ((0.5f - abs(a - AREF)) < 0.0f)\n"
|
||||
" discard;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"layout(index = 5) subroutine(AlphaTestType)\n"
|
||||
"void atest_ge(vec4 c)\n"
|
||||
"{\n"
|
||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
||||
" if ((a-AREF + 0.5f) < 0.0f)\n"
|
||||
" discard;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"layout(index = 6) subroutine(AlphaTestType)\n"
|
||||
"void atest_g(vec4 c)\n"
|
||||
"{\n"
|
||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
||||
" if ((a-AREF - 0.5f) < 0.0f)\n"
|
||||
" discard;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"layout(index = 7) subroutine(AlphaTestType)\n"
|
||||
"void atest_ne(vec4 c)\n"
|
||||
"{\n"
|
||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
||||
" if ((abs(a - AREF) - 0.5f) < 0.0f)\n"
|
||||
" discard;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"#else\n"
|
||||
"void atst(vec4 c)\n"
|
||||
"{\n"
|
||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
||||
|
@ -1210,16 +1281,64 @@ static const char* tfx_glsl =
|
|||
" discard;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"vec4 fog(vec4 c, float f)\n"
|
||||
"// Note layout stuff might require gl4.3\n"
|
||||
"#ifdef SUBROUTINE_GL40\n"
|
||||
"// Function pointer type\n"
|
||||
"subroutine void ColClipType(inout vec4 c);\n"
|
||||
"\n"
|
||||
"// a function pointer variable\n"
|
||||
"layout(location = 1) subroutine uniform ColClipType colclip;\n"
|
||||
"\n"
|
||||
"layout(index = 8) subroutine(ColClipType)\n"
|
||||
"void colclip_0(inout vec4 c)\n"
|
||||
"{\n"
|
||||
" // nothing to do\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"layout(index = 9) subroutine(ColClipType)\n"
|
||||
"void colclip_1(inout vec4 c)\n"
|
||||
"{\n"
|
||||
" // FIXME !!!!\n"
|
||||
" //c.rgb *= c.rgb < 128./255;\n"
|
||||
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
|
||||
" c.rgb *= vec3(factor);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"layout(index = 10) subroutine(ColClipType)\n"
|
||||
"void colclip_2(inout vec4 c)\n"
|
||||
"{\n"
|
||||
" c.rgb = 256.0f/255.0f - c.rgb;\n"
|
||||
" // FIXME !!!!\n"
|
||||
" //c.rgb *= c.rgb < 128./255;\n"
|
||||
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
|
||||
" c.rgb *= vec3(factor);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"#else\n"
|
||||
"void colclip(inout vec4 c)\n"
|
||||
"{\n"
|
||||
" if (PS_COLCLIP == 2)\n"
|
||||
" {\n"
|
||||
" c.rgb = 256.0f/255.0f - c.rgb;\n"
|
||||
" }\n"
|
||||
" if (PS_COLCLIP > 0)\n"
|
||||
" {\n"
|
||||
" // FIXME !!!!\n"
|
||||
" //c.rgb *= c.rgb < 128./255;\n"
|
||||
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
|
||||
" c.rgb *= vec3(factor);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"void fog(vec4 c, float f)\n"
|
||||
"{\n"
|
||||
" vec4 c_out = c;\n"
|
||||
" if(PS_FOG != 0)\n"
|
||||
" {\n"
|
||||
" c_out.rgb = mix(FogColor, c.rgb, f);\n"
|
||||
" c.rgb = mix(FogColor, c.rgb, f);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" return c_out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"vec4 ps_color()\n"
|
||||
|
@ -1232,19 +1351,9 @@ static const char* tfx_glsl =
|
|||
"\n"
|
||||
" atst(c);\n"
|
||||
"\n"
|
||||
" c = fog(c, PSin_t.z);\n"
|
||||
" fog(c, PSin_t.z);\n"
|
||||
"\n"
|
||||
" if (PS_COLCLIP == 2)\n"
|
||||
" {\n"
|
||||
" c.rgb = 256.0f/255.0f - c.rgb;\n"
|
||||
" }\n"
|
||||
" if (PS_COLCLIP > 0)\n"
|
||||
" {\n"
|
||||
" // FIXME !!!!\n"
|
||||
" //c.rgb *= c.rgb < 128./255;\n"
|
||||
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
|
||||
" c.rgb *= vec3(factor);\n"
|
||||
" }\n"
|
||||
" colclip(c);\n"
|
||||
"\n"
|
||||
" if(PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n"
|
||||
" {\n"
|
||||
|
|
|
@ -656,6 +656,77 @@ void datst()
|
|||
#endif
|
||||
}
|
||||
|
||||
// Note layout stuff might require gl4.3
|
||||
#ifdef SUBROUTINE_GL40
|
||||
// Function pointer type
|
||||
subroutine void AlphaTestType(vec4 c);
|
||||
|
||||
// a function pointer variable
|
||||
layout(location = 0) subroutine uniform AlphaTestType atst;
|
||||
|
||||
// The function attached to AlphaTestType
|
||||
layout(index = 0) subroutine(AlphaTestType)
|
||||
void atest_never(vec4 c)
|
||||
{
|
||||
discard;
|
||||
}
|
||||
|
||||
layout(index = 1) subroutine(AlphaTestType)
|
||||
void atest_always(vec4 c)
|
||||
{
|
||||
// Nothing to do
|
||||
}
|
||||
|
||||
layout(index = 2) subroutine(AlphaTestType)
|
||||
void atest_l(vec4 c)
|
||||
{
|
||||
float a = trunc(c.a * 255.0 + 0.01);
|
||||
if (PS_SPRITEHACK == 0)
|
||||
if ((AREF - a - 0.5f) < 0.0f)
|
||||
discard;
|
||||
}
|
||||
|
||||
layout(index = 3) subroutine(AlphaTestType)
|
||||
void atest_le(vec4 c)
|
||||
{
|
||||
float a = trunc(c.a * 255.0 + 0.01);
|
||||
if ((AREF - a + 0.5f) < 0.0f)
|
||||
discard;
|
||||
}
|
||||
|
||||
layout(index = 4) subroutine(AlphaTestType)
|
||||
void atest_e(vec4 c)
|
||||
{
|
||||
float a = trunc(c.a * 255.0 + 0.01);
|
||||
if ((0.5f - abs(a - AREF)) < 0.0f)
|
||||
discard;
|
||||
}
|
||||
|
||||
layout(index = 5) subroutine(AlphaTestType)
|
||||
void atest_ge(vec4 c)
|
||||
{
|
||||
float a = trunc(c.a * 255.0 + 0.01);
|
||||
if ((a-AREF + 0.5f) < 0.0f)
|
||||
discard;
|
||||
}
|
||||
|
||||
layout(index = 6) subroutine(AlphaTestType)
|
||||
void atest_g(vec4 c)
|
||||
{
|
||||
float a = trunc(c.a * 255.0 + 0.01);
|
||||
if ((a-AREF - 0.5f) < 0.0f)
|
||||
discard;
|
||||
}
|
||||
|
||||
layout(index = 7) subroutine(AlphaTestType)
|
||||
void atest_ne(vec4 c)
|
||||
{
|
||||
float a = trunc(c.a * 255.0 + 0.01);
|
||||
if ((abs(a - AREF) - 0.5f) < 0.0f)
|
||||
discard;
|
||||
}
|
||||
|
||||
#else
|
||||
void atst(vec4 c)
|
||||
{
|
||||
float a = trunc(c.a * 255.0 + 0.01);
|
||||
|
@ -700,16 +771,64 @@ void atst(vec4 c)
|
|||
discard;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
vec4 fog(vec4 c, float f)
|
||||
// Note layout stuff might require gl4.3
|
||||
#ifdef SUBROUTINE_GL40
|
||||
// Function pointer type
|
||||
subroutine void ColClipType(inout vec4 c);
|
||||
|
||||
// a function pointer variable
|
||||
layout(location = 1) subroutine uniform ColClipType colclip;
|
||||
|
||||
layout(index = 8) subroutine(ColClipType)
|
||||
void colclip_0(inout vec4 c)
|
||||
{
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
layout(index = 9) subroutine(ColClipType)
|
||||
void colclip_1(inout vec4 c)
|
||||
{
|
||||
// FIXME !!!!
|
||||
//c.rgb *= c.rgb < 128./255;
|
||||
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
|
||||
c.rgb *= vec3(factor);
|
||||
}
|
||||
|
||||
layout(index = 10) subroutine(ColClipType)
|
||||
void colclip_2(inout vec4 c)
|
||||
{
|
||||
c.rgb = 256.0f/255.0f - c.rgb;
|
||||
// FIXME !!!!
|
||||
//c.rgb *= c.rgb < 128./255;
|
||||
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
|
||||
c.rgb *= vec3(factor);
|
||||
}
|
||||
|
||||
#else
|
||||
void colclip(inout vec4 c)
|
||||
{
|
||||
if (PS_COLCLIP == 2)
|
||||
{
|
||||
c.rgb = 256.0f/255.0f - c.rgb;
|
||||
}
|
||||
if (PS_COLCLIP > 0)
|
||||
{
|
||||
// FIXME !!!!
|
||||
//c.rgb *= c.rgb < 128./255;
|
||||
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
|
||||
c.rgb *= vec3(factor);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void fog(vec4 c, float f)
|
||||
{
|
||||
vec4 c_out = c;
|
||||
if(PS_FOG != 0)
|
||||
{
|
||||
c_out.rgb = mix(FogColor, c.rgb, f);
|
||||
c.rgb = mix(FogColor, c.rgb, f);
|
||||
}
|
||||
|
||||
return c_out;
|
||||
}
|
||||
|
||||
vec4 ps_color()
|
||||
|
@ -722,19 +841,9 @@ vec4 ps_color()
|
|||
|
||||
atst(c);
|
||||
|
||||
c = fog(c, PSin_t.z);
|
||||
fog(c, PSin_t.z);
|
||||
|
||||
if (PS_COLCLIP == 2)
|
||||
{
|
||||
c.rgb = 256.0f/255.0f - c.rgb;
|
||||
}
|
||||
if (PS_COLCLIP > 0)
|
||||
{
|
||||
// FIXME !!!!
|
||||
//c.rgb *= c.rgb < 128./255;
|
||||
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
|
||||
c.rgb *= vec3(factor);
|
||||
}
|
||||
colclip(c);
|
||||
|
||||
if(PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue