gsdx ogl: Flush various pending work

* try to use more subroutine on VS&PS, unfortunately hit a driver crash!
* Call Attach/DetachContext through GSDevice so I can unmap currently mapped buffer
* Implement glsl part of GL_ARB_bindless texture, again hit another driver crash!
* various fix of GL_ARB_buffer_storage. Basic benchmark show only improvement on 'cold' case, I guess it will improve smoothness
* try to fix GL_clear_texture, no success so far. It seem the extension is limited to basic texture (aka no depth/stencil)



git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5752 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2013-10-24 20:54:27 +00:00
parent 178563a4b6
commit e80b002929
22 changed files with 762 additions and 122 deletions

View File

@ -248,9 +248,12 @@ namespace GLLoader {
#endif
if (ext.compare("GL_ARB_explicit_uniform_location") == 0) found_GL_ARB_explicit_uniform_location = true;
#ifdef GL44 // Need to debug the code first
// Need to check the clean (in particular of depth/stencil texture)
if (ext.compare("GL_ARB_clear_texture") == 0) found_GL_ARB_clear_texture = true;
if (ext.compare("GL_ARB_multi_bind") == 0) found_GL_ARB_multi_bind = true;
// FIXME unattach context case + perf
if (ext.compare("GL_ARB_buffer_storage") == 0) found_GL_ARB_buffer_storage = true;
// OK but no apitrace support
if (ext.compare("GL_ARB_multi_bind") == 0) found_GL_ARB_multi_bind = true;
#endif
#ifdef GLBINDLESS // Need to debug the code first
if (ext.compare("GL_ARB_bindless_texture") == 0) found_GL_ARB_bindless_texture = true;

View File

@ -65,6 +65,7 @@ namespace GLState {
GLuint vs = 0;
GLuint program = 0;
bool dirty_prog = false;
bool dirty_subroutine_vs = false;
bool dirty_subroutine_ps = false;
#if 0
struct {
@ -119,6 +120,7 @@ namespace GLState {
vs = 0;
program = 0;
dirty_prog = false;
dirty_subroutine_vs = false;
dirty_subroutine_ps = false;
dirty_ressources = false;
}

View File

@ -66,6 +66,7 @@ namespace GLState {
extern GLuint vs;
extern GLuint program; // monolith program (when sso isn't supported)
extern bool dirty_prog;
extern bool dirty_subroutine_vs;
extern bool dirty_subroutine_ps;
extern bool dirty_ressources;

View File

@ -541,13 +541,13 @@ EXPORT_C GSreadFIFO(uint8* mem)
#ifdef ENABLE_OGL_DEBUG
if (theApp.GetConfig("renderer", 0) / 3 == 4) fprintf(stderr, "Disable FIFO1 on opengl\n");
#endif
s_gs->m_wnd->AttachContext();
s_gs->m_dev->AttachContext();
#endif
s_gs->ReadFIFO(mem, 1);
#ifdef ENABLE_OGL_MT_HACK
s_gs->m_wnd->DetachContext();
s_gs->m_dev->DetachContext();
#endif
}
catch (GSDXRecoverableError)
@ -562,13 +562,13 @@ EXPORT_C GSreadFIFO2(uint8* mem, uint32 size)
#ifdef ENABLE_OGL_MT_HACK
// FIXME called from EE core thread not MTGS which cause
// invalidate data for opengl
s_gs->m_wnd->AttachContext();
s_gs->m_dev->AttachContext();
#endif
s_gs->ReadFIFO(mem, size);
#ifdef ENABLE_OGL_MT_HACK
s_gs->m_wnd->DetachContext();
s_gs->m_dev->DetachContext();
#endif
}
catch (GSDXRecoverableError)
@ -642,7 +642,7 @@ EXPORT_C GSvsync(int field)
#endif
#ifdef ENABLE_OGL_MT_HACK
s_gs->m_wnd->AttachContext();
s_gs->m_dev->AttachContext();
#endif
s_gs->VSync(field);
}

View File

@ -140,6 +140,10 @@ public:
virtual void PSSetShaderResource(int i, GSTexture* sr) {}
virtual void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) {}
// Used for opengl multithread hack
virtual void AttachContext() {}
virtual void DetachContext() {}
GSTexture* GetCurrent();
void Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c);
@ -179,4 +183,4 @@ struct GSAdapter
#ifdef _LINUX
// TODO
#endif
};
};

View File

@ -361,10 +361,31 @@ void GSDeviceOGL::Flip()
#endif
}
void GSDeviceOGL::AttachContext()
{
if (m_window)
m_window->AttachContext();
}
void GSDeviceOGL::DetachContext()
{
// Must be done before we detach the context!
if (GLLoader::found_GL_ARB_buffer_storage)
PboPool::UnmapAll();
if (m_window)
m_window->DetachContext();
}
void GSDeviceOGL::BeforeDraw()
{
m_shader->UseProgram();
#ifdef _DEBUG
ASSERT(gl_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
#endif
//#ifdef ENABLE_OGL_STENCIL_DEBUG
// if (m_date.t)
// static_cast<GSTextureOGL*>(m_date.t)->Save(format("/tmp/date_before_%04ld.csv", g_draw_count));
@ -408,7 +429,16 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
{
if (GLLoader::found_GL_ARB_clear_texture) {
static_cast<GSTextureOGL*>(t)->Clear((const void*)&c);
if (static_cast<GSTextureOGL*>(t)->IsBackbuffer()) {
glDisable(GL_SCISSOR_TEST);
OMSetFBO(0);
// glDrawBuffer(GL_BACK); // this is the default when there is no FB
// 0 will select the first drawbuffer ie GL_BACK
gl_ClearBufferfv(GL_COLOR, 0, c.v);
glEnable(GL_SCISSOR_TEST);
} else {
static_cast<GSTextureOGL*>(t)->Clear((const void*)&c);
}
} else {
glDisable(GL_SCISSOR_TEST);
if (static_cast<GSTextureOGL*>(t)->IsBackbuffer()) {
@ -455,10 +485,20 @@ void GSDeviceOGL::ClearRenderTarget_ui(GSTexture* t, uint32 c)
void GSDeviceOGL::ClearDepth(GSTexture* t, float c)
{
// TODO is it possible with GL44 ClearTexture?
// It is seriously not clear if we can clear only the depth
if (GLLoader::found_GL_ARB_clear_texture) {
gl_ClearTexImage(static_cast<GSTextureOGL*>(t)->GetID(), 0, GL_DEPTH_STENCIL, GL_FLOAT, &c);
// TODO is it possible with GL44 ClearTexture? no the API is garbage!
// Anyway, stencil can be cleared to 0 (it will be only used for date)
if (0 && GLLoader::found_GL_ARB_clear_texture) {
static_cast<GSTextureOGL*>(t)->EnableUnit();
// Yes a very nice API to mix float and integer
struct clear {
float depth;
GLuint stencil;
} clear;
clear.depth = c;
clear.stencil = 0;
gl_ClearTexImage(static_cast<GSTextureOGL*>(t)->GetID(), 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, &clear);
} else {
OMSetFBO(m_fbo);
OMSetWriteBuffer();
@ -478,9 +518,9 @@ void GSDeviceOGL::ClearDepth(GSTexture* t, float c)
void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c)
{
// TODO is it possible with GL44 ClearTexture?
// It is seriously not clear if we can clear only the stencil
// TODO is it possible with GL44 ClearTexture? no the API is garbage!
if (GLLoader::found_GL_ARB_clear_texture) {
static_cast<GSTextureOGL*>(t)->EnableUnit();
gl_ClearTexImage(static_cast<GSTextureOGL*>(t)->GetID(), 0, GL_DEPTH_STENCIL, GL_BYTE, &c);
} else {
OMSetFBO(m_fbo);
@ -558,10 +598,10 @@ void GSDeviceOGL::BindDateTexture()
{
// TODO: multibind?
// GLuint textures[1] = {static_cast<GSTextureOGL*>(m_date.t)->GetID()};
// gl_BindImageTextures(0, 1, textures);
//gl_BindImageTexture(0, 0, 0, true, 0, GL_READ_WRITE, GL_R32I);
// gl_BindImageTextures(2, 1, textures);
//gl_BindImageTexture(2, 0, 0, true, 0, GL_READ_WRITE, GL_R32I);
gl_BindImageTexture(0, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I);
gl_BindImageTexture(2, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I);
}
void GSDeviceOGL::RecycleDateTexture()
@ -1196,7 +1236,7 @@ void GSDeviceOGL::DebugOutputToFile(unsigned int source, unsigned int type, unsi
fprintf(f,"Type:%s\tID:%d\tSeverity:%s\tMessage:%s\n", debType, g_draw_count, debSev,message);
fclose(f);
}
//if (sev_counter > 2) assert(0);
ASSERT(sev_counter < 3);
#endif
}

View File

@ -260,9 +260,10 @@ class GSDeviceOGL : public GSDevice
struct
{
uint32 bppz:2;
uint32 logz:1;
// Next param will be handle by subroutine
uint32 tme:1;
uint32 fst:1;
uint32 logz:1;
};
uint32 key;
@ -333,24 +334,26 @@ class GSDeviceOGL : public GSDevice
struct
{
uint32 fst:1;
uint32 wms:2;
uint32 wmt:2;
uint32 fmt:3;
uint32 aem:1;
uint32 tfx:3;
uint32 tcc:1;
uint32 atst:3;
uint32 fog:1;
uint32 clr1:1;
uint32 fba:1;
uint32 aout:1;
uint32 ltf:1;
uint32 colclip:2;
uint32 date:2;
uint32 spritehack:1;
uint32 tcoffsethack:1;
uint32 point_sampler:1;
uint32 iip:1;
// Next param will be handle by subroutine
uint32 colclip:2;
uint32 atst:3;
uint32 tfx:3;
uint32 tcc:1;
uint32 wms:2;
uint32 wmt:2;
uint32 ltf:1;
};
uint32 key;
@ -538,7 +541,7 @@ class GSDeviceOGL : public GSDevice
GSDeviceOGL();
virtual ~GSDeviceOGL();
void CheckDebugLog();
static void CheckDebugLog();
static void DebugOutputToFile(unsigned int source, unsigned int type, unsigned int id, unsigned int severity, const char* message);
bool HasStencil() { return true; }
@ -548,6 +551,9 @@ class GSDeviceOGL : public GSDevice
bool Reset(int w, int h);
void Flip();
void SetVSync(bool enable);
// Used for opengl multithread hack
void AttachContext();
void DetachContext();
void DrawPrimitive();
void DrawIndexedPrimitive();

View File

@ -423,7 +423,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
ps_sel.spritehack = tex->m_spritehack_t;
// FIXME the ati is currently disabled on the shader. I need to find a .gs to test that we got same
// bug on opengl
ps_sel.point_sampler = !(bilinear && simple_sample);
// FIXME for the moment disable it on subroutine (it will kill my perf for nothings)
ps_sel.point_sampler = !(bilinear && simple_sample) && !GLLoader::found_GL_ARB_shader_subroutine;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();

View File

@ -25,9 +25,11 @@
GSShaderOGL::GSShaderOGL(bool debug) :
m_debug_shader(debug),
m_sub_count(0)
m_vs_sub_count(0),
m_ps_sub_count(0)
{
memset(&m_vs_sub, 0, countof(m_vs_sub)*sizeof(m_vs_sub[0]));
memset(&m_ps_sub, 0, countof(m_ps_sub)*sizeof(m_ps_sub[0]));
m_single_prog.clear();
@ -50,12 +52,15 @@ GSShaderOGL::~GSShaderOGL()
m_single_prog.clear();
}
void GSShaderOGL::VS(GLuint s)
void GSShaderOGL::VS(GLuint s, GLuint sub_count)
{
if (GLState::vs != s)
{
m_vs_sub_count = sub_count;
GLState::vs = s;
GLState::dirty_prog = true;
GLState::dirty_subroutine_vs = true;
#ifndef ENABLE_GLES
if (GLLoader::found_GL_ARB_separate_shader_objects)
gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s);
@ -63,11 +68,23 @@ void GSShaderOGL::VS(GLuint s)
}
}
void GSShaderOGL::VS_subroutine(GLuint *sub)
{
if (!(m_vs_sub[0] == sub[0])) {
m_vs_sub[0] = sub[0];
GLState::dirty_subroutine_vs = true;
}
}
void GSShaderOGL::PS_subroutine(GLuint *sub)
{
if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1])) {
// FIXME could be more efficient with GSvector
if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1] && m_ps_sub[2] == sub[2] && m_ps_sub[3] == sub[3] && m_ps_sub[4] == sub[4])) {
m_ps_sub[0] = sub[0];
m_ps_sub[1] = sub[1];
m_ps_sub[2] = sub[2];
m_ps_sub[3] = sub[3];
m_ps_sub[4] = sub[4];
GLState::dirty_subroutine_ps = true;
}
}
@ -85,7 +102,7 @@ void GSShaderOGL::PS(GLuint s, GLuint sub_count)
{
if (GLState::ps != s)
{
m_sub_count = sub_count;
m_ps_sub_count = sub_count;
GLState::ps = s;
GLState::dirty_prog = true;
@ -185,10 +202,14 @@ void GSShaderOGL::SetupUniform()
void GSShaderOGL::SetupSubroutineUniform()
{
if (!GLLoader::found_GL_ARB_shader_subroutine) return;
if (m_sub_count == 0) return;
if (GLState::dirty_subroutine_ps) {
gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_sub_count, m_ps_sub);
if (GLState::dirty_subroutine_vs && m_vs_sub_count) {
gl_UniformSubroutinesuiv(GL_VERTEX_SHADER, m_vs_sub_count, m_vs_sub);
GLState::dirty_subroutine_vs = false;
}
if (GLState::dirty_subroutine_ps && m_ps_sub_count) {
gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_ps_sub_count, m_ps_sub);
GLState::dirty_subroutine_ps = false;
}
}
@ -280,6 +301,7 @@ void GSShaderOGL::UseProgram()
{
if (GLState::dirty_prog) {
if (!GLLoader::found_GL_ARB_separate_shader_objects) {
GLState::dirty_subroutine_vs = true;
GLState::dirty_subroutine_ps = true;
GLState::dirty_ressources = true;
@ -355,17 +377,26 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
// Need GL version 400
header += "#define SUBROUTINE_GL40 1\n";
header += "#extension GL_ARB_shader_subroutine: require\n";
}
if (GLLoader::found_GL_ARB_explicit_uniform_location) {
// Need GL version 430
header += "#extension GL_ARB_explicit_uniform_location: require\n";
}
#ifdef ENABLE_OGL_STENCIL_DEBUG
header += "#define ENABLE_OGL_STENCIL_DEBUG 1\n";
#endif
if (GLLoader::found_GL_ARB_shader_image_load_store)
if (GLLoader::found_GL_ARB_shader_image_load_store) {
// Need GL version 420
header += "#extension GL_ARB_shader_image_load_store: require\n";
else
} else {
header += "#define DISABLE_GL42_image\n";
}
if (GLLoader::found_GL_ARB_bindless_texture && GLLoader::found_GL_ARB_explicit_uniform_location) {
// Future opengl 5?
header += "#extension GL_ARB_bindless_texture: require\n";
header += "#define ENABLE_BINDLESS_TEX\n";
}
#else
header = "#version 300 es\n";

View File

@ -25,9 +25,11 @@ class GSShaderOGL {
GLuint m_pipeline;
hash_map<uint64, GLuint > m_single_prog;
const bool m_debug_shader;
GLuint m_sub_count;
GLuint m_vs_sub_count;
GLuint m_ps_sub_count;
GLuint m_ps_sub[2];
GLuint m_vs_sub[1];
GLuint m_ps_sub[5];
void SetupSubroutineUniform();
void SetupUniform();
@ -51,7 +53,8 @@ class GSShaderOGL {
void PS(GLuint s, GLuint sub_count = 0);
void PS_subroutine(GLuint *sub);
void PS_ressources(GLuint64 handle[2]);
void VS(GLuint s);
void VS(GLuint s, GLuint sub_count = 0);
void VS_subroutine(GLuint *sub);
void UseProgram();

View File

@ -1287,7 +1287,7 @@ void GSState::GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r)
case 1: // local -> host
m_tr.Init(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY);
#ifdef ENABLE_OGL_MT_HACK
s_gs->m_wnd->DetachContext();
s_gs->m_dev->DetachContext();
#endif
break;
case 2: // local -> local
@ -1794,7 +1794,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
{
GSPerfMonAutoTimer pmat(&m_perfmon);
#ifdef ENABLE_OGL_MT_HACK
s_gs->m_wnd->AttachContext();
s_gs->m_dev->AttachContext();
#endif
const uint8* start = mem;

View File

@ -136,9 +136,16 @@ void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer*
void GSDeviceOGL::SetupVS(VSSelector sel)
{
GLuint vs = m_vs[sel];
if (GLLoader::found_GL_ARB_shader_subroutine) {
GLuint sub[1];
sub[0] = sel.tme ? 1 + (uint32)sel.fst : 0;
m_shader->VS_subroutine(sub);
// Handle by subroutine useless now
sel.tme = 0;
sel.fst = 0;
}
m_shader->VS(vs);
m_shader->VS(m_vs[sel], 1);
}
void GSDeviceOGL::SetupGS(bool enable)
@ -152,11 +159,35 @@ void GSDeviceOGL::SetupGS(bool enable)
void GSDeviceOGL::SetupPS(PSSelector sel)
{
if (GLLoader::found_GL_ARB_shader_subroutine) {
GLuint sub[2] = {sel.atst, (uint32)sel.colclip + 8};
GLuint tfx = sel.tfx > 3 ? 19 : 11 + (uint32)sel.tfx + (uint32)sel.tcc*4;
GLuint colclip = 8 + (uint32)sel.colclip;
GLuint clamp =
(sel.wms == 2 && sel.wmt == 2) ? 20 :
(sel.wms == 2) ? 21 :
(sel.wmt == 2) ? 22 : 23;
GLuint wrap =
(sel.wms == 2 && sel.wmt == 2) ? 24 :
(sel.wms == 3 && sel.wmt == 3) ? 25 :
(sel.wms == 2 && sel.wmt == 3) ? 26 :
(sel.wms == 3 && sel.wmt == 2) ? 27 :
(sel.wms == 2) ? 28 :
(sel.wmt == 3) ? 29 :
(sel.wms == 3) ? 30 :
(sel.wmt == 2) ? 31 : 32;
GLuint sub[5] = {sel.atst, colclip, tfx, clamp, wrap};
m_shader->PS_subroutine(sub);
// Handle by subroutine useless now
sel.atst = 0;
sel.colclip = 0;
sel.tfx = 0;
sel.tcc = 0;
// sel.wms = 0;
// sel.wmt = 0;
}
// *************************************************************
@ -175,7 +206,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel)
// *************************************************************
// Dynamic
// *************************************************************
m_shader->PS(ps, 2);
m_shader->PS(ps, 3);
}
void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel)

View File

@ -24,10 +24,7 @@
#include "GSTextureOGL.h"
#include "GLState.h"
// Flush need bind/unbind
// Barrier might sync much more
#define BARRIER_INSTEAD_FLUSH
// FIXME OGL4: investigate, only 1 unpack buffer always bound
namespace PboPool {
GLuint m_pool[PBO_POOL_SIZE];
@ -46,11 +43,12 @@ namespace PboPool {
if (GLLoader::found_GL_ARB_buffer_storage) {
gl_BufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_DYNAMIC_STORAGE_BIT | GL_CLIENT_STORAGE_BIT);
} else {
gl_BufferData(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_STREAM_DRAW);
m_offset[m_current_pbo] = 0;
m_map[m_current_pbo] = NULL;
gl_BufferData(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_STREAM_COPY);
}
m_offset[m_current_pbo] = 0;
m_map[m_current_pbo] = NULL;
NextPbo();
}
UnbindPbo();
@ -60,11 +58,7 @@ namespace PboPool {
if (m_map[m_current_pbo] != NULL) return;
// FIXME I'm not sure it is allowed to map another buffer after we get a pointer
#ifdef BARRIER_INSTEAD_FLUSH
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_PERSISTENT_BIT;
#else
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
#endif
for (size_t i = 0; i < countof(m_pool); i++) {
BindPbo();
m_map[m_current_pbo] = (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, flags);
@ -107,10 +101,15 @@ namespace PboPool {
m_offset[m_current_pbo] = 0;
}
// Note: it still need it because texsubimage will access currently bound buffer
// Pbo ready let's get a pointer
BindPbo();
return m_map[m_current_pbo] + m_offset[m_current_pbo];
}
}
// FIXME: unmap buffer when the context is dettached (not sure it is required actually)
void UnmapAll() {
if (m_map[m_current_pbo] == NULL) return;
@ -125,14 +124,7 @@ namespace PboPool {
void Unmap() {
if (GLLoader::found_GL_ARB_buffer_storage) {
// GL4.4 do a glMemoryBarrier? or glFlushMappedBufferRange?
#ifdef BARRIER_INSTEAD_FLUSH
gl_MemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
#else
BindPbo();
gl_FlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size);
UnbindPbo();
#endif
} else {
gl_UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
}
@ -276,7 +268,7 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read)
m_pbo_size = (m_size.x * m_size.y) << m_int_shift;
gl_BindBuffer(GL_PIXEL_PACK_BUFFER, m_pbo_id);
gl_BufferData(GL_PIXEL_PACK_BUFFER, m_pbo_size, NULL, GL_STREAM_DRAW);
gl_BufferData(GL_PIXEL_PACK_BUFFER, m_pbo_size, NULL, GL_STREAM_READ);
gl_BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
case GSTexture::DepthStencil:
@ -310,7 +302,8 @@ GSTextureOGL::~GSTextureOGL()
void GSTextureOGL::Clear(const void *data)
{
gl_ClearTexImage(m_texture_id, 0, m_format, m_int_type, data);
EnableUnit();
gl_ClearTexImage(m_texture_id, 0, m_int_format, m_int_type, data);
}
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
@ -340,8 +333,9 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset());
if (!GLLoader::found_GL_ARB_buffer_storage)
PboPool::UnbindPbo();
// FIXME OGL4: investigate, only 1 unpack buffer always bound
//if (!GLLoader::found_GL_ARB_buffer_storage)
PboPool::UnbindPbo();
PboPool::EndTransfer();

View File

@ -110,7 +110,7 @@ void GSWndGL::PopulateGlFunction()
// GL4.3
*(void**)&(gl_CopyImageSubData) = GetProcAddress("glCopyImageSubData", true);
// GL4.4
*(void**)&(gl_ClearTexImage) = GetProcAddress("glCLearTexImage", true);
*(void**)&(gl_ClearTexImage) = GetProcAddress("glClearTexImage", true);
*(void**)&(gl_BindTextures) = GetProcAddress("glBindTextures", true);
*(void**)&(gl_BufferStorage) = GetProcAddress("glBufferStorage", true);
// GL_ARB_bindless_texture (GL5?)

View File

@ -36,7 +36,7 @@ int main ( int argc, char *argv[] )
void *handle = dlopen(argv[1], RTLD_LAZY|RTLD_GLOBAL);
if (handle == NULL) {
fprintf(stderr, "Failed to open plugin %s\n", argv[1]);
fprintf(stderr, "Failed to dlopen plugin %s\n", argv[1]);
help();
}

View File

@ -96,11 +96,15 @@ layout(location = 0) out uint SV_Target1;
layout(location = 0) out vec4 SV_Target0;
#endif
#ifdef ENABLE_BINDLESS_TEX
layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
#else
#ifdef DISABLE_GL42
uniform sampler2D TextureSampler;
#else
layout(binding = 0) uniform sampler2D TextureSampler;
#endif
#endif
vec4 sample_c()
{

View File

@ -81,11 +81,15 @@ layout(std140, binding = 13) uniform cb13
vec4 _rcpFrameOpt;
};
#ifdef ENABLE_BINDLESS_TEX
layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
#else
#ifdef DISABLE_GL42
uniform sampler2D TextureSampler;
#else
layout(binding = 0) uniform sampler2D TextureSampler;
#endif
#endif
#if !GL_ES && __VERSION__ > 140

View File

@ -121,11 +121,15 @@ static const char* convert_glsl =
"layout(location = 0) out vec4 SV_Target0;\n"
"#endif\n"
"\n"
"#ifdef ENABLE_BINDLESS_TEX\n"
"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
"#else\n"
"#ifdef DISABLE_GL42\n"
"uniform sampler2D TextureSampler;\n"
"#else\n"
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
"#endif\n"
"#endif\n"
"\n"
"vec4 sample_c()\n"
"{\n"
@ -296,11 +300,15 @@ static const char* interlace_glsl =
" float hH;\n"
"};\n"
"\n"
"#ifdef ENABLE_BINDLESS_TEX\n"
"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
"#else\n"
"#ifdef DISABLE_GL42\n"
"uniform sampler2D TextureSampler;\n"
"#else\n"
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
"#endif\n"
"#endif\n"
"\n"
"// TODO ensure that clip (discard) is < 0 and not <= 0 ???\n"
"void ps_main0()\n"
@ -389,11 +397,15 @@ static const char* merge_glsl =
" vec4 BGColor;\n"
"};\n"
"\n"
"#ifdef ENABLE_BINDLESS_TEX\n"
"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
"#else\n"
"#ifdef DISABLE_GL42\n"
"uniform sampler2D TextureSampler;\n"
"#else\n"
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
"#endif\n"
"#endif\n"
"\n"
"void ps_main0()\n"
"{\n"
@ -465,11 +477,15 @@ static const char* shadeboost_glsl =
" vec4 BGColor;\n"
"};\n"
"\n"
"#ifdef ENABLE_BINDLESS_TEX\n"
"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
"#else\n"
"#ifdef DISABLE_GL42\n"
"uniform sampler2D TextureSampler;\n"
"#else\n"
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
"#endif\n"
"#endif\n"
"\n"
"// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% \n"
"vec4 ContrastSaturationBrightness(vec4 color)\n"
@ -617,6 +633,60 @@ static const char* tfx_glsl =
"\n"
"const float exp_min32 = exp2(-32.0f);\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"// Function pointer type\n"
"subroutine void TextureCoordType(void);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 0) subroutine uniform TextureCoordType texture_coord;\n"
"\n"
"layout(index = 0) subroutine(TextureCoordType)\n"
"void tme_0()\n"
"{\n"
" VSout_t.xy = vec2(0.0f, 0.0f);\n"
" VSout_t.w = 1.0f;\n"
"}\n"
"\n"
"layout(index = 1) subroutine(TextureCoordType)\n"
"void tme_1_fst_0()\n"
"{\n"
" VSout_t.xy = i_st;\n"
" VSout_t.w = i_q;\n"
"}\n"
"\n"
"layout(index = 2) subroutine(TextureCoordType)\n"
"void tme_1_fst_1()\n"
"{\n"
" VSout_t.xy = vec2(i_uv) * TextureScale;\n"
" VSout_t.w = 1.0f;\n"
"}\n"
"\n"
"#else\n"
"\n"
"void texture_coord()\n"
"{\n"
" if(VS_TME != 0)\n"
" {\n"
" if(VS_FST != 0)\n"
" {\n"
" VSout_t.xy = vec2(i_uv) * TextureScale;\n"
" VSout_t.w = 1.0f;\n"
" }\n"
" else\n"
" {\n"
" VSout_t.xy = i_st;\n"
" VSout_t.w = i_q;\n"
" }\n"
" }\n"
" else\n"
" {\n"
" VSout_t.xy = vec2(0.0f, 0.0f);\n"
" VSout_t.w = 1.0f;\n"
" }\n"
"}\n"
"\n"
"#endif\n"
"\n"
"void vs_main()\n"
"{\n"
" uint z;\n"
@ -642,24 +712,7 @@ static const char* tfx_glsl =
"\n"
" gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n"
"\n"
" if(VS_TME != 0)\n"
" {\n"
" if(VS_FST != 0)\n"
" {\n"
" VSout_t.xy = vec2(i_uv) * TextureScale;\n"
" VSout_t.w = 1.0f;\n"
" }\n"
" else\n"
" {\n"
" VSout_t.xy = i_st;\n"
" VSout_t.w = i_q;\n"
" }\n"
" }\n"
" else\n"
" {\n"
" VSout_t.xy = vec2(0.0f, 0.0f);\n"
" VSout_t.w = 1.0f;\n"
" }\n"
" texture_coord();\n"
"\n"
" VSout_c = i_c;\n"
" VSout_fc = i_c;\n"
@ -804,20 +857,23 @@ static const char* tfx_glsl =
"layout(location = 0, index = 1) out vec4 SV_Target1;\n"
"#endif\n"
"\n"
"#ifdef ENABLE_BINDLESS_TEX\n"
"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
"layout(bindless_sampler, location = 1) uniform sampler2D PaletteSampler;\n"
"#else\n"
"#ifdef DISABLE_GL42\n"
"uniform sampler2D TextureSampler;\n"
"uniform sampler2D PaletteSampler;\n"
"//uniform sampler2D RTCopySampler;\n"
"#else\n"
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
"layout(binding = 1) uniform sampler2D PaletteSampler;\n"
"//layout(binding = 2) uniform sampler2D RTCopySampler;\n"
"#endif\n"
"#endif\n"
"\n"
"#ifndef DISABLE_GL42_image\n"
"#if PS_DATE > 0\n"
"// FIXME how to declare memory access\n"
"layout(r32i, binding = 0) coherent uniform iimage2D img_prim_min;\n"
"layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min;\n"
"#endif\n"
"#else\n"
"// use basic stencil\n"
@ -878,6 +934,87 @@ static const char* tfx_glsl =
"}\n"
"#endif\n"
"\n"
"// FIXME crash nvidia\n"
"#if 0\n"
"// Function pointer type\n"
"subroutine vec4 WrapType(vec4 uv);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 4) subroutine uniform WrapType wrapuv;\n"
"\n"
"layout(index = 24) subroutine(WrapType)\n"
"vec4 wrapuv_wms_wmt_2(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 25) subroutine(WrapType)\n"
"vec4 wrapuv_wms_wmt3(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 26) subroutine(WrapType)\n"
"vec4 wrapuv_wms2_wmt3(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
" uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 27) subroutine(WrapType)\n"
"vec4 wrapuv_wms3_wmt2(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
" uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 28) subroutine(WrapType)\n"
"vec4 wrapuv_wms2_wmtx(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 29) subroutine(WrapType)\n"
"vec4 wrapuv_wmsx_wmt3(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 30) subroutine(WrapType)\n"
"vec4 wrapuv_wms3_wmtx(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 31) subroutine(WrapType)\n"
"vec4 wrapuv_wmsx_wmt2(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 32) subroutine(WrapType)\n"
"vec4 wrapuv_dummy(vec4 uv)\n"
"{\n"
" return uv;\n"
"}\n"
"\n"
"#else\n"
"vec4 wrapuv(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
@ -915,7 +1052,45 @@ static const char* tfx_glsl =
"\n"
" return uv_out;\n"
"}\n"
"#endif\n"
"\n"
"// FIXME crash nvidia\n"
"#if 0\n"
"// Function pointer type\n"
"subroutine vec2 ClampType(vec2 uv);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 3) subroutine uniform ClampType clampuv;\n"
"\n"
"layout(index = 20) subroutine(ClampType)\n"
"vec2 clampuv_wms2_wmt2(vec2 uv)\n"
"{\n"
" return clamp(uv, MinF, MinMax.zw);\n"
"}\n"
"\n"
"layout(index = 21) subroutine(ClampType)\n"
"vec2 clampuv_wms2(vec2 uv)\n"
"{\n"
" vec2 uv_out = uv;\n"
" uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 22) subroutine(ClampType)\n"
"vec2 clampuv_wmt2(vec2 uv)\n"
"{\n"
" vec2 uv_out = uv;\n"
" uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 23) subroutine(ClampType)\n"
"vec2 clampuv_dummy(vec2 uv)\n"
"{\n"
" return uv;\n"
"}\n"
"\n"
"#else\n"
"vec2 clampuv(vec2 uv)\n"
"{\n"
" vec2 uv_out = uv;\n"
@ -935,6 +1110,7 @@ static const char* tfx_glsl =
"\n"
" return uv_out;\n"
"}\n"
"#endif\n"
"\n"
"mat4 sample_4c(vec4 uv)\n"
"{\n"
@ -1043,6 +1219,86 @@ static const char* tfx_glsl =
" return t;\n"
"}\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"// Function pointer type\n"
"subroutine vec4 TfxType(vec4 t, vec4 c);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 2) subroutine uniform TfxType tfx;\n"
"\n"
"layout(index = 11) subroutine(TfxType)\n"
"vec4 tfx_0_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 12) subroutine(TfxType)\n"
"vec4 tfx_1_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = t.rgb;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 13) subroutine(TfxType)\n"
"vec4 tfx_2_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 14) subroutine(TfxType)\n"
"vec4 tfx_3_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 15) subroutine(TfxType)\n"
"vec4 tfx_0_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out = c * t * 255.0f / 128.0f;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 16) subroutine(TfxType)\n"
"vec4 tfx_1_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out = t;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 17) subroutine(TfxType)\n"
"vec4 tfx_2_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" c_out.a += t.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 18) subroutine(TfxType)\n"
"vec4 tfx_3_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" c_out.a = t.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 19) subroutine(TfxType)\n"
"vec4 tfx_dummy(vec4 t, vec4 c)\n"
"{\n"
" return c;\n"
"}\n"
"\n"
"#else\n"
"vec4 tfx(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
@ -1087,8 +1343,10 @@ static const char* tfx_glsl =
" }\n"
" }\n"
"\n"
" return clamp(c_out, vec4(0.0f, 0.0f, 0.0f, 0.0f), vec4(1.0f, 1.0f, 1.0f, 1.0f));\n"
" return c_out;\n"
"}\n"
"#endif\n"
"\n"
"\n"
"#if 0\n"
"void datst()\n"
@ -1105,7 +1363,6 @@ static const char* tfx_glsl =
"}\n"
"#endif\n"
"\n"
"// Note layout stuff might require gl4.3\n"
"#ifdef SUBROUTINE_GL40\n"
"// Function pointer type\n"
"subroutine void AlphaTestType(vec4 c);\n"
@ -1113,7 +1370,6 @@ static const char* tfx_glsl =
"// a function pointer variable\n"
"layout(location = 0) subroutine uniform AlphaTestType atst;\n"
"\n"
"// The function attached to AlphaTestType\n"
"layout(index = 0) subroutine(AlphaTestType)\n"
"void atest_never(vec4 c)\n"
"{\n"
@ -1284,10 +1540,12 @@ static const char* tfx_glsl =
"{\n"
" vec4 t = sample_color(PSin_t.xy, PSin_t.w);\n"
"\n"
" vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f);\n"
" vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f);\n"
"#if PS_IIP == 1\n"
" vec4 c = tfx(t, PSin_c);\n"
" vec4 c = clamp(tfx(t, PSin_c), zero, one);\n"
"#else\n"
" vec4 c = tfx(t, PSin_fc);\n"
" vec4 c = clamp(tfx(t, PSin_fc), zero, one);\n"
"#endif\n"
"\n"
" atst(c);\n"
@ -1457,11 +1715,15 @@ static const char* fxaa_fx =
" vec4 _rcpFrameOpt;\n"
"};\n"
"\n"
"#ifdef ENABLE_BINDLESS_TEX\n"
"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
"#else\n"
"#ifdef DISABLE_GL42\n"
"uniform sampler2D TextureSampler;\n"
"#else\n"
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
"#endif\n"
"#endif\n"
"\n"
"#if !GL_ES && __VERSION__ > 140\n"
"\n"

View File

@ -45,11 +45,15 @@ layout(std140, binding = 11) uniform cb11
float hH;
};
#ifdef ENABLE_BINDLESS_TEX
layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
#else
#ifdef DISABLE_GL42
uniform sampler2D TextureSampler;
#else
layout(binding = 0) uniform sampler2D TextureSampler;
#endif
#endif
// TODO ensure that clip (discard) is < 0 and not <= 0 ???
void ps_main0()

View File

@ -44,11 +44,15 @@ layout(std140, binding = 10) uniform cb10
vec4 BGColor;
};
#ifdef ENABLE_BINDLESS_TEX
layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
#else
#ifdef DISABLE_GL42
uniform sampler2D TextureSampler;
#else
layout(binding = 0) uniform sampler2D TextureSampler;
#endif
#endif
void ps_main0()
{

View File

@ -50,11 +50,15 @@ layout(std140, binding = 12) uniform cb12
vec4 BGColor;
};
#ifdef ENABLE_BINDLESS_TEX
layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
#else
#ifdef DISABLE_GL42
uniform sampler2D TextureSampler;
#else
layout(binding = 0) uniform sampler2D TextureSampler;
#endif
#endif
// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150%
vec4 ContrastSaturationBrightness(vec4 color)

View File

@ -107,6 +107,60 @@ layout(std140, binding = 20) uniform cb20
const float exp_min32 = exp2(-32.0f);
#ifdef SUBROUTINE_GL40
// Function pointer type
subroutine void TextureCoordType(void);
// a function pointer variable
layout(location = 0) subroutine uniform TextureCoordType texture_coord;
layout(index = 0) subroutine(TextureCoordType)
void tme_0()
{
VSout_t.xy = vec2(0.0f, 0.0f);
VSout_t.w = 1.0f;
}
layout(index = 1) subroutine(TextureCoordType)
void tme_1_fst_0()
{
VSout_t.xy = i_st;
VSout_t.w = i_q;
}
layout(index = 2) subroutine(TextureCoordType)
void tme_1_fst_1()
{
VSout_t.xy = vec2(i_uv) * TextureScale;
VSout_t.w = 1.0f;
}
#else
void texture_coord()
{
if(VS_TME != 0)
{
if(VS_FST != 0)
{
VSout_t.xy = vec2(i_uv) * TextureScale;
VSout_t.w = 1.0f;
}
else
{
VSout_t.xy = i_st;
VSout_t.w = i_q;
}
}
else
{
VSout_t.xy = vec2(0.0f, 0.0f);
VSout_t.w = 1.0f;
}
}
#endif
void vs_main()
{
uint z;
@ -132,24 +186,7 @@ void vs_main()
gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position
if(VS_TME != 0)
{
if(VS_FST != 0)
{
VSout_t.xy = vec2(i_uv) * TextureScale;
VSout_t.w = 1.0f;
}
else
{
VSout_t.xy = i_st;
VSout_t.w = i_q;
}
}
else
{
VSout_t.xy = vec2(0.0f, 0.0f);
VSout_t.w = 1.0f;
}
texture_coord();
VSout_c = i_c;
VSout_fc = i_c;
@ -294,20 +331,23 @@ layout(location = 0, index = 0) out vec4 SV_Target0;
layout(location = 0, index = 1) out vec4 SV_Target1;
#endif
#ifdef ENABLE_BINDLESS_TEX
layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
layout(bindless_sampler, location = 1) uniform sampler2D PaletteSampler;
#else
#ifdef DISABLE_GL42
uniform sampler2D TextureSampler;
uniform sampler2D PaletteSampler;
//uniform sampler2D RTCopySampler;
#else
layout(binding = 0) uniform sampler2D TextureSampler;
layout(binding = 1) uniform sampler2D PaletteSampler;
//layout(binding = 2) uniform sampler2D RTCopySampler;
#endif
#endif
#ifndef DISABLE_GL42_image
#if PS_DATE > 0
// FIXME how to declare memory access
layout(r32i, binding = 0) coherent uniform iimage2D img_prim_min;
layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min;
#endif
#else
// use basic stencil
@ -368,6 +408,87 @@ vec4 sample_rt(vec2 uv)
}
#endif
// FIXME crash nvidia
#if 0
// Function pointer type
subroutine vec4 WrapType(vec4 uv);
// a function pointer variable
layout(location = 4) subroutine uniform WrapType wrapuv;
layout(index = 24) subroutine(WrapType)
vec4 wrapuv_wms_wmt_2(vec4 uv)
{
vec4 uv_out = uv;
uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);
return uv_out;
}
layout(index = 25) subroutine(WrapType)
vec4 wrapuv_wms_wmt3(vec4 uv)
{
vec4 uv_out = uv;
uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;
return uv_out;
}
layout(index = 26) subroutine(WrapType)
vec4 wrapuv_wms2_wmt3(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;
return uv_out;
}
layout(index = 27) subroutine(WrapType)
vec4 wrapuv_wms3_wmt2(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
return uv_out;
}
layout(index = 28) subroutine(WrapType)
vec4 wrapuv_wms2_wmtx(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
return uv_out;
}
layout(index = 29) subroutine(WrapType)
vec4 wrapuv_wmsx_wmt3(vec4 uv)
{
vec4 uv_out = uv;
uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;
return uv_out;
}
layout(index = 30) subroutine(WrapType)
vec4 wrapuv_wms3_wmtx(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;
return uv_out;
}
layout(index = 31) subroutine(WrapType)
vec4 wrapuv_wmsx_wmt2(vec4 uv)
{
vec4 uv_out = uv;
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
return uv_out;
}
layout(index = 32) subroutine(WrapType)
vec4 wrapuv_dummy(vec4 uv)
{
return uv;
}
#else
vec4 wrapuv(vec4 uv)
{
vec4 uv_out = uv;
@ -405,7 +526,45 @@ vec4 wrapuv(vec4 uv)
return uv_out;
}
#endif
// FIXME crash nvidia
#if 0
// Function pointer type
subroutine vec2 ClampType(vec2 uv);
// a function pointer variable
layout(location = 3) subroutine uniform ClampType clampuv;
layout(index = 20) subroutine(ClampType)
vec2 clampuv_wms2_wmt2(vec2 uv)
{
return clamp(uv, MinF, MinMax.zw);
}
layout(index = 21) subroutine(ClampType)
vec2 clampuv_wms2(vec2 uv)
{
vec2 uv_out = uv;
uv_out.x = clamp(uv.x, MinF.x, MinMax.z);
return uv_out;
}
layout(index = 22) subroutine(ClampType)
vec2 clampuv_wmt2(vec2 uv)
{
vec2 uv_out = uv;
uv_out.y = clamp(uv.y, MinF.y, MinMax.w);
return uv_out;
}
layout(index = 23) subroutine(ClampType)
vec2 clampuv_dummy(vec2 uv)
{
return uv;
}
#else
vec2 clampuv(vec2 uv)
{
vec2 uv_out = uv;
@ -425,6 +584,7 @@ vec2 clampuv(vec2 uv)
return uv_out;
}
#endif
mat4 sample_4c(vec4 uv)
{
@ -533,6 +693,86 @@ vec4 sample_color(vec2 st, float q)
return t;
}
#ifdef SUBROUTINE_GL40
// Function pointer type
subroutine vec4 TfxType(vec4 t, vec4 c);
// a function pointer variable
layout(location = 2) subroutine uniform TfxType tfx;
layout(index = 11) subroutine(TfxType)
vec4 tfx_0_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;
return c_out;
}
layout(index = 12) subroutine(TfxType)
vec4 tfx_1_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = t.rgb;
return c_out;
}
layout(index = 13) subroutine(TfxType)
vec4 tfx_2_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
return c_out;
}
layout(index = 14) subroutine(TfxType)
vec4 tfx_3_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
return c_out;
}
layout(index = 15) subroutine(TfxType)
vec4 tfx_0_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out = c * t * 255.0f / 128.0f;
return c_out;
}
layout(index = 16) subroutine(TfxType)
vec4 tfx_1_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out = t;
return c_out;
}
layout(index = 17) subroutine(TfxType)
vec4 tfx_2_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
c_out.a += t.a;
return c_out;
}
layout(index = 18) subroutine(TfxType)
vec4 tfx_3_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
c_out.a = t.a;
return c_out;
}
layout(index = 19) subroutine(TfxType)
vec4 tfx_dummy(vec4 t, vec4 c)
{
return c;
}
#else
vec4 tfx(vec4 t, vec4 c)
{
vec4 c_out = c;
@ -577,8 +817,10 @@ vec4 tfx(vec4 t, vec4 c)
}
}
return clamp(c_out, vec4(0.0f, 0.0f, 0.0f, 0.0f), vec4(1.0f, 1.0f, 1.0f, 1.0f));
return c_out;
}
#endif
#if 0
void datst()
@ -595,7 +837,6 @@ void datst()
}
#endif
// Note layout stuff might require gl4.3
#ifdef SUBROUTINE_GL40
// Function pointer type
subroutine void AlphaTestType(vec4 c);
@ -603,7 +844,6 @@ subroutine void AlphaTestType(vec4 c);
// a function pointer variable
layout(location = 0) subroutine uniform AlphaTestType atst;
// The function attached to AlphaTestType
layout(index = 0) subroutine(AlphaTestType)
void atest_never(vec4 c)
{
@ -774,10 +1014,12 @@ vec4 ps_color()
{
vec4 t = sample_color(PSin_t.xy, PSin_t.w);
vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f);
vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f);
#if PS_IIP == 1
vec4 c = tfx(t, PSin_c);
vec4 c = clamp(tfx(t, PSin_c), zero, one);
#else
vec4 c = tfx(t, PSin_fc);
vec4 c = clamp(tfx(t, PSin_fc), zero, one);
#endif
atst(c);