gsdx ogl:

* Separate state and shader compilation into separate function
* replace various hash_map by basic array
* Compact VertexScale and offset into a single vec4
* add the new option "ogl_vertex_subdata": subdata is faster on FGLRX, test are welcome on Nvidia drivers
    0 => use map/unmap
    1 => use subdata

replay: add "linux_replay" option and compute some nice stat (mean, standard deviation)

cmake: recreate shader header at build time


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5682 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2013-06-26 20:09:07 +00:00
parent 9cd463e4f8
commit ca1edbf2cb
10 changed files with 308 additions and 321 deletions

View File

@ -3,9 +3,11 @@
use strict;
use warnings;
use File::Spec;
use File::Basename;
use Cwd 'abs_path';
my @res = qw/convert interlace merge shadeboost tfx/;
my $path = File::Spec->catdir("plugins", "GSdx", "res");
my $path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res");
foreach my $r (@res) {
glsl2h($path, $r, "glsl");

View File

@ -177,8 +177,20 @@ set(GSdxHeaders
xbyak/xbyak_util.h
)
set(GSdxHeaders
res/convert.h
res/fxaa.h
res/interlace.h
res/merge.h
res/shaderboost.h
res/tfx.h
)
include_directories(.)
# Generate Glsl header file
add_custom_command(OUTPUT res/convert.h res/fxaa.h res/interlace.h res/merge.h res/shaderboost.h res/tfx.h COMMAND perl ${PROJECT_SOURCE_DIR}/linux_various/glsl2h.pl)
add_library(${Output} SHARED ${GSdxSources} ${GSdxHeaders})
target_link_libraries(${Output} ${X11_LIBRARIES})

View File

@ -1424,6 +1424,9 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
return;
}
vector<float> stats;
stats.clear();
if(FILE* fp = fopen(lpszCmdLine, "rb"))
{
//Console console("GSdx", true);
@ -1522,11 +1525,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
//while(IsWindowVisible(hWnd))
//FIXME map?
int finished = 2;
int finished = theApp.GetConfig("linux_replay", 1);
unsigned long frame_number = 0;
while(finished > 0)
{
frame_number = 0;
unsigned long start = timeGetTime();
unsigned long frame_number = 0;
for(auto i = packets.begin(); i != packets.end(); i++)
{
Packet* p = *i;
@ -1571,10 +1575,30 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
fprintf(stderr, "The %ld frames of the scene was render on %ldms\n", frame_number, end - start);
fprintf(stderr, "A means of %fms by frame\n", (float)(end - start)/(float)frame_number);
stats.push_back((float)(end - start));
sleep(1);
finished--;
}
// Print some nice stats
float n = (float)theApp.GetConfig("linux_replay", 1);
float mean = 0;
float sd = 0;
for (auto i = stats.begin(); i != stats.end(); i++) {
mean += *i;
}
mean = mean/n;
for (auto i = stats.begin(); i != stats.end(); i++) {
sd += pow((*i)-mean, 2);
}
sd = sqrt(sd/n);
fprintf(stderr, "\n\nMean: %fms\n", mean);
fprintf(stderr, "Standard deviation: %fms\n", sd);
fprintf(stderr, "Mean by frame: %fms (%ffps)\n", mean/(float)frame_number, 1000.0f*frame_number/mean);
fprintf(stderr, "Standard deviatin by frame: %fms\n", sd/(float)frame_number);
for(auto i = packets.begin(); i != packets.end(); i++)
{
@ -1589,6 +1613,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
GSshutdown();
fclose(fp);
} else {
fprintf(stderr, "failed to open %s\n", lpszCmdLine);
}
}
#endif

View File

@ -58,6 +58,7 @@ GSDeviceOGL::GSDeviceOGL()
, m_vb_sr(NULL)
{
m_msaa = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_MSAA", 0) : 0;
m_debug_shader = !!theApp.GetConfig("debug_ogl_shader", 1);
memset(&m_merge_obj, 0, sizeof(m_merge_obj));
memset(&m_interlace, 0, sizeof(m_interlace));
@ -134,24 +135,22 @@ GSDeviceOGL::~GSDeviceOGL()
delete m_vb;
if (GLLoader::found_GL_ARB_separate_shader_objects) {
for (auto it = m_vs.begin(); it != m_vs.end() ; it++) gl_DeleteProgram(it->second);
for (auto it = m_gs.begin(); it != m_gs.end() ; it++) gl_DeleteProgram(it->second);
for (uint32 key = 0; key < VSSelector::size(); key++) gl_DeleteProgram(m_vs[key]);
for (uint32 key = 0; key < GSSelector::size(); key++) gl_DeleteProgram(m_gs[key]);
for (auto it = m_ps.begin(); it != m_ps.end() ; it++) gl_DeleteProgram(it->second);
} else {
for (auto it = m_vs.begin(); it != m_vs.end() ; it++) gl_DeleteShader(it->second);
for (auto it = m_gs.begin(); it != m_gs.end() ; it++) gl_DeleteShader(it->second);
for (uint32 key = 0; key < VSSelector::size(); key++) gl_DeleteShader(m_vs[key]);
for (uint32 key = 0; key < GSSelector::size(); key++) gl_DeleteShader(m_gs[key]);
for (auto it = m_ps.begin(); it != m_ps.end() ; it++) gl_DeleteShader(it->second);
for (auto it = m_single_prog.begin(); it != m_single_prog.end() ; it++) gl_DeleteProgram(it->second);
m_single_prog.clear();
}
for (auto it = m_ps_ss.begin(); it != m_ps_ss.end() ; it++) gl_DeleteSamplers(1, &it->second);
m_vs.clear();
m_gs.clear();
gl_DeleteSamplers(PSSamplerSelector::size(), m_ps_ss);
for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++) delete m_om_dss[key];
m_ps.clear();
m_ps_ss.clear();
m_om_dss.clear();
m_om_bs.clear();
}
@ -248,8 +247,8 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
hr = m_dev->CreateBlendState(&bsd, &m_convert.bs);
#endif
CreateSampler(m_convert.ln, true, false, false);
CreateSampler(m_convert.pt, false, false, false);
m_convert.ln = CreateSampler(true, false, false);
m_convert.pt = CreateSampler(false, false, false);
m_convert.dss = new GSDepthStencilOGL();
m_convert.bs = new GSBlendStateOGL();
@ -625,8 +624,9 @@ void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c)
glEnable(GL_SCISSOR_TEST);
}
void GSDeviceOGL::CreateSampler(GLuint& sampler, bool bilinear, bool tau, bool tav)
GLuint GSDeviceOGL::CreateSampler(bool bilinear, bool tau, bool tav)
{
GLuint sampler;
gl_GenSamplers(1, &sampler);
if (bilinear) {
gl_SamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@ -657,6 +657,8 @@ void GSDeviceOGL::CreateSampler(GLuint& sampler, bool bilinear, bool tau, bool t
gl_SamplerParameteri(sampler, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);
gl_SamplerParameteri(sampler, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
// FIXME: need ogl extension sd.MaxAnisotropy = 16;
return sampler;
}
GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format)
@ -1363,7 +1365,7 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st
free(header_str);
free(sources_array);
if (theApp.GetConfig("debug_ogl_shader", 1) == 1) {
if (m_debug_shader) {
GLint log_length = 0;
GLint status = false;
if (GLLoader::found_GL_ARB_separate_shader_objects) {

View File

@ -240,14 +240,12 @@ class GSDeviceOGL : public GSDevice
public:
__aligned(struct, 32) VSConstantBuffer
{
GSVector4 VertexScale;
GSVector4 VertexOffset;
GSVector4 Vertex_Scale_Offset;
GSVector4 TextureScale;
VSConstantBuffer()
{
VertexScale = GSVector4::zero();
VertexOffset = GSVector4::zero();
Vertex_Scale_Offset = GSVector4::zero();
TextureScale = GSVector4::zero();
}
@ -258,13 +256,11 @@ class GSDeviceOGL : public GSDevice
GSVector4i b0 = b[0];
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue())
if(!((a[0] == b0) & (a[1] == b1)).alltrue())
{
a[0] = b0;
a[1] = b1;
a[2] = b2;
return true;
}
@ -283,7 +279,6 @@ class GSDeviceOGL : public GSDevice
uint32 tme:1;
uint32 fst:1;
uint32 logz:1;
//uint32 rtcopy:1;
};
uint32 key;
@ -292,6 +287,9 @@ class GSDeviceOGL : public GSDevice
operator uint32() {return key & 0x3f;}
VSSelector() : key(0) {}
VSSelector(uint32 k) : key(k) {}
static uint32 size() { return 1 << 5; }
};
__aligned(struct, 32) PSConstantBuffer
@ -327,7 +325,8 @@ class GSDeviceOGL : public GSDevice
GSVector4i b4 = b[4];
GSVector4i b5 = b[5];
if(!((a[0] == b0) /*& (a[1] == b1)*/ & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) // if WH matches HalfTexel does too
// if WH matches both HalfTexel and TC_OffsetHack do too
if(!((a[0] == b0) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue())
{
a[0] = b0;
a[1] = b1;
@ -359,6 +358,9 @@ class GSDeviceOGL : public GSDevice
operator uint32() {return key & 0x7;}
GSSelector() : key(0) {}
GSSelector(uint32 k) : key(k) {}
static uint32 size() { return 1 << 3; }
};
struct PSSelector
@ -413,6 +415,9 @@ class GSDeviceOGL : public GSDevice
operator uint32() {return key & 0x7;}
PSSamplerSelector() : key(0) {}
PSSamplerSelector(uint32 k) : key(k) {}
static uint32 size() { return 1 << 3; }
};
struct OMDepthStencilSelector
@ -434,6 +439,9 @@ class GSDeviceOGL : public GSDevice
operator uint32() {return key & 0x3f;}
OMDepthStencilSelector() : key(0) {}
OMDepthStencilSelector(uint32 k) : key(k) {}
static uint32 size() { return 1 << 6; }
};
struct OMBlendSelector
@ -490,6 +498,8 @@ class GSDeviceOGL : public GSDevice
GSVertexBufferStateOGL* m_vb; // vb_state for HW renderer
GSVertexBufferStateOGL* m_vb_sr; // vb_state for StretchRect
bool m_debug_shader;
struct {
GLuint ps[2]; // program object
GSUniformBufferOGL* cb; // uniform buffer object
@ -552,11 +562,11 @@ class GSDeviceOGL : public GSDevice
GLenum draw;
} m_state;
hash_map<uint32, GLuint > m_vs;
hash_map<uint32, GLuint > m_gs;
GLuint m_vs[1<<5];
GLuint m_gs[1<<3];
GLuint m_ps_ss[1<<3];
GSDepthStencilOGL* m_om_dss[1<<6];
hash_map<uint32, GLuint > m_ps;
hash_map<uint32, GLuint > m_ps_ss;
hash_map<uint32, GSDepthStencilOGL* > m_om_dss;
hash_map<uint32, GSBlendStateOGL* > m_om_bs;
GLuint m_palette_ss;
@ -603,7 +613,6 @@ class GSDeviceOGL : public GSDevice
void ClearDepth(GSTexture* t, float c);
void ClearStencil(GSTexture* t, uint8 c);
void CreateSampler(GLuint& sampler, bool bilinear, bool tau, bool tav);
GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0);
GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0);
GSTexture* CreateTexture(int w, int h, int format = 0);
@ -648,6 +657,15 @@ class GSDeviceOGL : public GSDevice
void CreateTextureFX();
GLuint CompileVS(VSSelector sel);
GLuint CompileGS(GSSelector sel);
GLuint CompilePS(PSSelector sel);
GLuint CreateSampler(bool bilinear, bool tau, bool tav);
GLuint CreateSampler(PSSamplerSelector sel);
GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);
GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, uint8 afix);
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel);

View File

@ -157,21 +157,23 @@ void GSRendererOGL::SetupIA()
dev->IASetVertexState();
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next))
{
GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
if(UserHacks_WildHack && !isPackedUV_HackFlag)
if(UserHacks_WildHack && !isPackedUV_HackFlag) {
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next))
{
GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
GSVertex* RESTRICT d = (GSVertex*)ptr;
for(unsigned int i = 0; i < m_vertex.next; i++)
{
if(PRIM->TME && PRIM->FST) d[i].UV &= 0x3FEF3FEF;
}
}
dev->IAUnmapVertexBuffer();
dev->IAUnmapVertexBuffer();
}
} else {
// By default use the common path (in case it can be made faster)
dev->IASetVertexBuffer(m_vertex.buff, m_vertex.next);
}
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
@ -202,7 +204,6 @@ void GSRendererOGL::SetupIA()
dev->IASetPrimitiveTopology(t);
}
void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSDrawingEnvironment& env = m_env;
@ -213,8 +214,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
//OGL GSTexture* rtcopy = NULL;
ASSERT(m_dev != NULL);
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
@ -232,32 +231,14 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
GSVertexPT1 vertices[] =
{
#if 0
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
#else
{GSVector4(dst.x, dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
#endif
};
//fprintf(stderr, "DATE A:%fx%f B:%fx%f\n", dst.x, -dst.y, dst.z, -dst.w);
//fprintf(stderr, "DATE SR: %f %f %f %f\n", src.x, src.y, src.z, src.w);
//fprintf(stderr, "DATE offset: %f\n", o.x);
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
}
else
{
//OGL rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
//OGL // I'll use VertexTrace when I consider it more trustworthy
//OGL dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
}
}
//
@ -320,7 +301,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.logz = m_logz ? 1 : 0;
//OGL vs_sel.rtcopy = !!rtcopy;
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
@ -363,8 +343,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
float ox = (float)(int)context->XYOFFSET.OFX;
float oy = (float)(int)context->XYOFFSET.OFY;
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
float ox2 = -1.0f / rtsize.x;
float oy2 = -1.0f / rtsize.y;
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
//because DX10 and DX9 have a different pixel center.)
@ -374,16 +354,12 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
if(rt->LikelyOffset)
{
// DX9 has pixelcenter set to 0.0, so give it some value here
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
}
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
// Note: DX does y *= -1.0
vs_cb.Vertex_Scale_Offset = GSVector4(sx, sy, ox * sx + ox2 + 1, oy * sy + oy2 + 1);
// END of FIXME
// gs
@ -519,7 +495,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->OMSetRenderTargets(rt, ds, &scissor);
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
//OGL dev->PSSetShaderResource(2, rtcopy);
uint8 afix = context->ALPHA.FIX;
@ -607,7 +582,5 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->EndScene();
//OGL dev->Recycle(rtcopy);
if(om_dssel.fba) UpdateFBA(rt);
}

View File

@ -33,7 +33,7 @@ void GSDeviceOGL::CreateTextureFX()
m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer));
m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer));
CreateSampler(m_palette_ss, false, false, false);
m_palette_ss = CreateSampler(false, false, false);
GSInputLayoutOGL vert_format[] =
{
@ -56,77 +56,158 @@ void GSDeviceOGL::CreateTextureFX()
// Pre compile all Geometry & Vertex Shader
// It might cost a seconds at startup but it would reduce benchmark pollution
GSDeviceOGL::GSSelector gs_sel;
for (uint32 key = 0; key < (1 << 3); key++) {
gs_sel.key = key;
SetupGS(gs_sel);
for (uint32 key = 0; key < GSSelector::size(); key++)
m_gs[key] = CompileGS(GSSelector(key));
for (uint32 key = 0; key < VSSelector::size(); key++)
m_vs[key] = CompileVS(VSSelector(key));
for (uint32 key = 0; key < PSSamplerSelector::size(); key++)
m_ps_ss[key] = CreateSampler(PSSamplerSelector(key));
for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++)
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
}
GLuint GSDeviceOGL::CompileVS(VSSelector sel)
{
GLuint vs;
std::string macro = format("#define VS_BPPZ %d\n", sel.bppz)
+ format("#define VS_LOGZ %d\n", sel.logz)
+ format("#define VS_TME %d\n", sel.tme)
+ format("#define VS_FST %d\n", sel.fst);
CompileShaderFromSource("tfx.glsl", "vs_main", GL_VERTEX_SHADER, &vs, tfx_glsl, macro);
return vs;
}
GLuint GSDeviceOGL::CompileGS(GSSelector sel)
{
GLuint gs;
// Easy case
if(! (sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)))
return 0;
std::string macro = format("#define GS_IIP %d\n", sel.iip)
+ format("#define GS_PRIM %d\n", sel.prim);
CompileShaderFromSource("tfx.glsl", "gs_main", GL_GEOMETRY_SHADER, &gs, tfx_glsl, macro);
return gs;
}
GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel)
{
return CreateSampler(sel.ltf, sel.tau, sel.tav);
}
GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel)
{
GSDepthStencilOGL* dss = new GSDepthStencilOGL();
if (dssel.date)
{
dss->EnableStencil();
dss->SetStencil(GL_EQUAL, dssel.alpha_stencil ? GL_ZERO : GL_KEEP);
}
GSDeviceOGL::VSSelector vs_sel;
for (uint32 key = 0; key < (1 << 5); key++) {
vs_sel.key = key;
SetupVS(vs_sel, NULL);
if(dssel.ztst != ZTST_ALWAYS || dssel.zwe)
{
static const GLenum ztst[] =
{
GL_NEVER,
GL_ALWAYS,
GL_GEQUAL,
GL_GREATER
};
dss->EnableDepth();
dss->SetDepth(ztst[dssel.ztst], dssel.zwe);
}
// Use sane reset value
GSSetShader(0);
VSSetShader(0);
return dss;
}
GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix)
{
GSBlendStateOGL* bs = new GSBlendStateOGL();
if(bsel.abe)
{
int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;
bs->EnableBlend();
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
if(m_blendMapD3D9[i].bogus == 1)
{
if (bsel.a == 0)
bs->SetRGB(m_blendMapD3D9[i].op, GL_ONE, m_blendMapD3D9[i].dst);
else
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE);
const string afixstr = format("%d >> 7", afix);
const char *col[3] = {"Cs", "Cd", "0"};
const char *alpha[3] = {"As", "Ad", afixstr.c_str()};
// FIXME, need to investigate OGL capabilities. Maybe for OGL5 ;)
fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]);
}
// Not very good but I don't wanna write another 81 row table
if(bsel.negative) bs->RevertOp();
}
bs->SetMask(bsel.wr, bsel.wg, bsel.wb, bsel.wa);
return bs;
}
GLuint GSDeviceOGL::CompilePS(PSSelector sel)
{
GLuint ps;
std::string macro = format("#define PS_FST %d\n", sel.fst)
+ format("#define PS_WMS %d\n", sel.wms)
+ format("#define PS_WMT %d\n", sel.wmt)
+ format("#define PS_FMT %d\n", sel.fmt)
+ format("#define PS_AEM %d\n", sel.aem)
+ format("#define PS_TFX %d\n", sel.tfx)
+ format("#define PS_TCC %d\n", sel.tcc)
+ format("#define PS_ATST %d\n", sel.atst)
+ format("#define PS_FOG %d\n", sel.fog)
+ format("#define PS_CLR1 %d\n", sel.clr1)
+ format("#define PS_FBA %d\n", sel.fba)
+ format("#define PS_AOUT %d\n", sel.aout)
+ format("#define PS_LTF %d\n", sel.ltf)
+ format("#define PS_COLCLIP %d\n", sel.colclip)
+ format("#define PS_DATE %d\n", sel.date)
+ format("#define PS_SPRITEHACK %d\n", sel.spritehack)
+ format("#define PS_TCOFFSETHACK %d\n", sel.tcoffsethack)
+ format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler);
CompileShaderFromSource("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, &ps, tfx_glsl, macro);
return ps;
}
void GSDeviceOGL::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
{
// *************************************************************
// Static
// *************************************************************
auto i = m_vs.find(sel);
GLuint vs = m_vs[sel];
if(i == m_vs.end())
{
std::string macro = format("#define VS_BPPZ %d\n", sel.bppz)
+ format("#define VS_LOGZ %d\n", sel.logz)
+ format("#define VS_TME %d\n", sel.tme)
+ format("#define VS_FST %d\n", sel.fst);
GLuint vs;
CompileShaderFromSource("tfx.glsl", "vs_main", GL_VERTEX_SHADER, &vs, tfx_glsl, macro);
m_vs[sel] = vs;
i = m_vs.find(sel);
}
// *************************************************************
// Dynamic
// *************************************************************
if(cb != NULL && m_vs_cb_cache.Update(cb)) {
if(m_vs_cb_cache.Update(cb)) {
SetUniformBuffer(m_vs_cb);
m_vs_cb->upload(cb);
}
VSSetShader(i->second);
VSSetShader(vs);
}
void GSDeviceOGL::SetupGS(GSSelector sel)
{
// *************************************************************
// Static
// *************************************************************
GLuint gs = 0;
if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3))
{
auto i = m_gs.find(sel);
GLuint gs = m_gs[sel];
if(i == m_gs.end()) {
std::string macro = format("#define GS_IIP %d\n", sel.iip)
+ format("#define GS_PRIM %d\n", sel.prim);
CompileShaderFromSource("tfx.glsl", "gs_main", GL_GEOMETRY_SHADER, &gs, tfx_glsl, macro);
m_gs[sel] = gs;
} else {
gs = i->second;
}
}
// *************************************************************
// Dynamic
// *************************************************************
GSSetShader(gs);
}
@ -138,29 +219,8 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerS
GLuint ps;
auto i = m_ps.find(sel);
if (i == m_ps.end())
{
std::string macro = format("#define PS_FST %d\n", sel.fst)
+ format("#define PS_WMS %d\n", sel.wms)
+ format("#define PS_WMT %d\n", sel.wmt)
+ format("#define PS_FMT %d\n", sel.fmt)
+ format("#define PS_AEM %d\n", sel.aem)
+ format("#define PS_TFX %d\n", sel.tfx)
+ format("#define PS_TCC %d\n", sel.tcc)
+ format("#define PS_ATST %d\n", sel.atst)
+ format("#define PS_FOG %d\n", sel.fog)
+ format("#define PS_CLR1 %d\n", sel.clr1)
+ format("#define PS_FBA %d\n", sel.fba)
+ format("#define PS_AOUT %d\n", sel.aout)
+ format("#define PS_LTF %d\n", sel.ltf)
+ format("#define PS_COLCLIP %d\n", sel.colclip)
+ format("#define PS_DATE %d\n", sel.date)
+ format("#define PS_SPRITEHACK %d\n", sel.spritehack)
+ format("#define PS_TCOFFSETHACK %d\n", sel.tcoffsethack)
+ format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler);
CompileShaderFromSource("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, &ps, tfx_glsl, macro);
if (i == m_ps.end()) {
ps = CompilePS(sel);
m_ps[sel] = ps;
} else {
ps = i->second;
@ -183,21 +243,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerS
ssel.ltf = 0;
}
auto i = m_ps_ss.find(ssel);
if(i != m_ps_ss.end())
{
ss0 = i->second;
}
else
{
// *************************************************************
// Static
// *************************************************************
CreateSampler(ss0, ssel.ltf, ssel.tau, ssel.tav);
m_ps_ss[ssel] = ss0;
}
ss0 = m_ps_ss[ssel];
if(sel.fmt >= 3)
{
@ -211,86 +257,26 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerS
void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
{
auto i = m_om_dss.find(dssel);
GSDepthStencilOGL* dss = m_om_dss[dssel];
// *************************************************************
// Static
// *************************************************************
if (i == m_om_dss.end())
{
GSDepthStencilOGL* dss = new GSDepthStencilOGL();
if (dssel.date)
{
dss->EnableStencil();
dss->SetStencil(GL_EQUAL, dssel.alpha_stencil ? GL_ZERO : GL_KEEP);
}
if(dssel.ztst != ZTST_ALWAYS || dssel.zwe)
{
static const GLenum ztst[] =
{
GL_NEVER,
GL_ALWAYS,
GL_GEQUAL,
GL_GREATER
};
dss->EnableDepth();
dss->SetDepth(ztst[dssel.ztst], dssel.zwe);
}
m_om_dss[dssel] = dss;
i = m_om_dss.find(dssel);
}
// *************************************************************
// Dynamic
// *************************************************************
OMSetDepthStencilState(i->second, 1);
OMSetDepthStencilState(dss, 1);
// *************************************************************
// Static
// *************************************************************
auto j = m_om_bs.find(bsel);
GSBlendStateOGL* bs;
if(j == m_om_bs.end())
{
GSBlendStateOGL* bs = new GSBlendStateOGL();
if(bsel.abe)
{
int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;
bs->EnableBlend();
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
if(m_blendMapD3D9[i].bogus == 1)
{
if (bsel.a == 0)
bs->SetRGB(m_blendMapD3D9[i].op, GL_ONE, m_blendMapD3D9[i].dst);
else
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE);
const string afixstr = format("%d >> 7", afix);
const char *col[3] = {"Cs", "Cd", "0"};
const char *alpha[3] = {"As", "Ad", afixstr.c_str()};
// FIXME, need to investigate OGL capabilities. Maybe for OGL5 ;)
fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]);
}
// Not very good but I don't wanna write another 81 row table
if(bsel.negative) bs->RevertOp();
}
bs->SetMask(bsel.wr, bsel.wg, bsel.wb, bsel.wa);
bs = CreateBlend(bsel, afix);
m_om_bs[bsel] = bs;
j = m_om_bs.find(bsel);
} else {
bs = j->second;
}
// *************************************************************
// Dynamic
// *************************************************************
OMSetBlendState(j->second, (float)(int)afix / 0x80);
OMSetBlendState(bs, (float)(int)afix / 0x80);
}

View File

@ -31,13 +31,13 @@ struct GSInputLayoutOGL {
};
class GSBufferOGL {
size_t m_stride;
const size_t m_stride;
size_t m_start;
size_t m_count;
size_t m_limit;
GLenum m_target;
const GLenum m_target;
GLuint m_buffer;
size_t m_default_size;
const bool m_sub_data_config;
public:
GSBufferOGL(GLenum target, size_t stride) :
@ -46,15 +46,16 @@ class GSBufferOGL {
, m_count(0)
, m_limit(0)
, m_target(target)
, m_sub_data_config((bool)theApp.GetConfig("ogl_vertex_subdata", 1))
{
gl_GenBuffers(1, &m_buffer);
// Opengl works best with 1-4MB buffer.
m_default_size = 2 * 1024 * 1024 / m_stride;
m_limit = 2 * 1024 * 1024 / m_stride;
}
~GSBufferOGL() { gl_DeleteBuffers(1, &m_buffer); }
void allocate() { allocate(m_default_size); }
void allocate() { allocate(m_limit); }
void allocate(size_t new_limit)
{
@ -68,9 +69,26 @@ class GSBufferOGL {
gl_BindBuffer(m_target, m_buffer);
}
void upload(const void* src, uint32 count)
void subdata_upload(const void* src, uint32 count)
{
m_count = count;
// Current GPU buffer is really too small need to allocate a new one
if (m_count > m_limit) {
allocate(std::max<int>(m_count * 3 / 2, m_limit));
} else if (m_count > (m_limit - m_start) ) {
// Not enough left free room. Just go back at the beginning
m_start = 0;
// Orphan the buffer to avoid synchronization
allocate(m_limit);
}
gl_BufferSubData(m_target, m_stride * m_start, m_stride * m_count, src);
}
void map_upload(const void* src, uint32 count)
{
// Upload the data to the buffer
void* dst;
if (Map(&dst, count)) {
// FIXME which one to use
@ -80,14 +98,16 @@ class GSBufferOGL {
}
}
void upload(const void* src, uint32 count)
{
if (m_sub_data_config) {
subdata_upload(src, count);
} else {
map_upload(src, count);
}
}
bool Map(void** pointer, uint32 count ) {
#ifdef ENABLE_OGL_DEBUG
GLint b_size = -1;
gl_GetBufferParameteriv(m_target, GL_BUFFER_SIZE, &b_size);
if (b_size <= 0) return false;
#endif
m_count = count;
// Note: For an explanation of the map flag
@ -96,7 +116,7 @@ class GSBufferOGL {
// Current GPU buffer is really too small need to allocate a new one
if (m_count > m_limit) {
allocate(std::max<int>(m_count * 3 / 2, m_default_size));
allocate(std::max<int>(m_count * 3 / 2, m_limit));
} else if (m_count > (m_limit - m_start) ) {
// Not enough left free room. Just go back at the beginning
@ -113,13 +133,7 @@ class GSBufferOGL {
// Upload the data to the buffer
*pointer = (uint8*) gl_MapBufferRange(m_target, m_stride*m_start, m_stride*m_count, map_flags);
//fprintf(stderr, "Map %x from %d to %d\n", *pointer, m_start, m_start+m_count);
#ifdef ENABLE_OGL_DEBUG
if (*pointer == NULL) {
fprintf(stderr, "CRITICAL ERROR map failed for vb!!!\n");
return false;
}
#endif
return true;
}

View File

@ -46,7 +46,6 @@
struct vertex
{
//vec4 p;
vec4 t;
vec4 tp;
vec4 c;
@ -69,17 +68,14 @@ layout(location = 0) out vertex VSout;
#define VSout_c (VSout.c)
#else
#ifdef DISABLE_SSO
//out vec4 SHADERp;
out vec4 SHADERt;
out vec4 SHADERtp;
out vec4 SHADERc;
#else
//layout(location = 0) out vec4 SHADERp;
layout(location = 0) out vec4 SHADERt;
layout(location = 1) out vec4 SHADERtp;
layout(location = 2) out vec4 SHADERc;
#endif
//#define VSout_p SHADERp
#define VSout_t SHADERt
#define VSout_tp SHADERtp
#define VSout_c SHADERc
@ -99,11 +95,13 @@ layout(std140) uniform cb20
layout(std140, binding = 20) uniform cb20
#endif
{
vec4 VertexScale;
vec4 VertexOffset;
vec2 VertexScale;
vec2 VertexOffset;
vec2 TextureScale;
};
const float exp_min32 = exp2(-32);
void vs_main()
{
uint z;
@ -119,35 +117,25 @@ void vs_main()
// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0);
vec4 final_p = p * VertexScale - VertexOffset;
// FIXME
// FLIP vertically
final_p.y *= -1.0f;
vec3 p = vec3(i_p, z) - vec3(0.05f, 0.05f, 0.0f);
p = p * vec3(VertexScale, exp_min32) - vec3(VertexOffset, 0.0f);
if(VS_LOGZ == 1)
{
final_p.z = log2(1.0f + float(z)) / 32.0f;
p.z = log2(1.0f + float(z)) / 32.0f;
}
//VSout_p = final_p;
gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position
#if VS_RTCOPY
VSout_tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5;
#endif
gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position
if(VS_TME != 0)
{
if(VS_FST != 0)
{
//VSout_t.xy = i_t * TextureScale;
VSout_t.xy = i_uv * TextureScale;
VSout_t.w = 1.0f;
}
else
{
//VSout_t.xy = i_t;
VSout_t.xy = i_st;
VSout_t.w = i_q;
}
@ -188,7 +176,7 @@ layout(points, max_vertices = 1) out;
void gs_main()
{
for(int i = 0; i < gl_in.length(); i++) {
gl_Position = gl_in[i].gl_Position; // FIXME is it useful
gl_Position = gl_in[i].gl_Position;
GSout = GSin[i];
EmitVertex();
}
@ -202,7 +190,7 @@ layout(line_strip, max_vertices = 2) out;
void gs_main()
{
for(int i = 0; i < gl_in.length(); i++) {
gl_Position = gl_in[i].gl_Position; // FIXME is it useful
gl_Position = gl_in[i].gl_Position;
GSout = GSin[i];
#if GS_IIP == 0
if (i == 0)
@ -220,7 +208,7 @@ layout(triangle_strip, max_vertices = 3) out;
void gs_main()
{
for(int i = 0; i < gl_in.length(); i++) {
gl_Position = gl_in[i].gl_Position; // FIXME is it useful
gl_Position = gl_in[i].gl_Position;
GSout = GSin[i];
#if GS_IIP == 0
if (i == 0 || i == 1)
@ -299,23 +287,19 @@ void gs_main()
#ifdef FRAGMENT_SHADER
#if __VERSION__ > 140 && !(defined(NO_STRUCT))
layout(location = 0) in vertex PSin;
//#define PSin_p (PSin.p)
#define PSin_t (PSin.t)
#define PSin_tp (PSin.tp)
#define PSin_c (PSin.c)
#else
#ifdef DISABLE_SSO
in vec4 SHADERp;
in vec4 SHADERt;
in vec4 SHADERtp;
in vec4 SHADERc;
#else
//layout(location = 0) in vec4 SHADERp;
layout(location = 0) in vec4 SHADERt;
layout(location = 1) in vec4 SHADERtp;
layout(location = 2) in vec4 SHADERc;
#endif
//#define PSin_p SHADERp
#define PSin_t SHADERt
#define PSin_tp SHADERtp
#define PSin_c SHADERc
@ -365,10 +349,7 @@ vec4 sample_c(vec2 uv)
uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
}
// FIXME I'm not sure it is a good solution to flip texture
return texture(TextureSampler, uv);
//FIXME another way to FLIP vertically
//return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) );
}
vec4 sample_p(float u)
@ -698,12 +679,8 @@ vec4 ps_color()
void ps_main()
{
//FIXME
vec4 c = ps_color();
// FIXME: I'm not sure about the value of others field
// output.c1 = c.a * 2; // used for alpha blending
float alpha = c.a * 2;
if(PS_AOUT != 0) // 16 bit output

View File

@ -74,7 +74,6 @@ static const char* tfx_glsl =
"\n"
"struct vertex\n"
"{\n"
" //vec4 p;\n"
" vec4 t;\n"
" vec4 tp;\n"
" vec4 c;\n"
@ -97,17 +96,14 @@ static const char* tfx_glsl =
"#define VSout_c (VSout.c)\n"
"#else\n"
"#ifdef DISABLE_SSO\n"
"//out vec4 SHADERp;\n"
"out vec4 SHADERt;\n"
"out vec4 SHADERtp;\n"
"out vec4 SHADERc;\n"
"#else\n"
"//layout(location = 0) out vec4 SHADERp;\n"
"layout(location = 0) out vec4 SHADERt;\n"
"layout(location = 1) out vec4 SHADERtp;\n"
"layout(location = 2) out vec4 SHADERc;\n"
"#endif\n"
"//#define VSout_p SHADERp\n"
"#define VSout_t SHADERt\n"
"#define VSout_tp SHADERtp\n"
"#define VSout_c SHADERc\n"
@ -127,11 +123,13 @@ static const char* tfx_glsl =
"layout(std140, binding = 20) uniform cb20\n"
"#endif\n"
"{\n"
" vec4 VertexScale;\n"
" vec4 VertexOffset;\n"
" vec2 VertexScale;\n"
" vec2 VertexOffset;\n"
" vec2 TextureScale;\n"
"};\n"
"\n"
"const float exp_min32 = exp2(-32);\n"
"\n"
"void vs_main()\n"
"{\n"
" uint z;\n"
@ -147,35 +145,25 @@ static const char* tfx_glsl =
" // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel\n"
" // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133\n"
"\n"
" vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0); \n"
" vec4 final_p = p * VertexScale - VertexOffset;\n"
" // FIXME\n"
" // FLIP vertically\n"
" final_p.y *= -1.0f;\n"
" vec3 p = vec3(i_p, z) - vec3(0.05f, 0.05f, 0.0f);\n"
" p = p * vec3(VertexScale, exp_min32) - vec3(VertexOffset, 0.0f);\n"
"\n"
" if(VS_LOGZ == 1)\n"
" {\n"
" final_p.z = log2(1.0f + float(z)) / 32.0f;\n"
" p.z = log2(1.0f + float(z)) / 32.0f;\n"
" }\n"
"\n"
" //VSout_p = final_p;\n"
" gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n"
"#if VS_RTCOPY\n"
" VSout_tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5;\n"
"#endif\n"
"\n"
" gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n"
"\n"
" if(VS_TME != 0)\n"
" {\n"
" if(VS_FST != 0)\n"
" {\n"
" //VSout_t.xy = i_t * TextureScale;\n"
" VSout_t.xy = i_uv * TextureScale;\n"
" VSout_t.w = 1.0f;\n"
" }\n"
" else\n"
" {\n"
" //VSout_t.xy = i_t;\n"
" VSout_t.xy = i_st;\n"
" VSout_t.w = i_q;\n"
" }\n"
@ -216,7 +204,7 @@ static const char* tfx_glsl =
"void gs_main()\n"
"{\n"
" for(int i = 0; i < gl_in.length(); i++) {\n"
" gl_Position = gl_in[i].gl_Position; // FIXME is it useful\n"
" gl_Position = gl_in[i].gl_Position;\n"
" GSout = GSin[i];\n"
" EmitVertex();\n"
" }\n"
@ -230,7 +218,7 @@ static const char* tfx_glsl =
"void gs_main()\n"
"{\n"
" for(int i = 0; i < gl_in.length(); i++) {\n"
" gl_Position = gl_in[i].gl_Position; // FIXME is it useful\n"
" gl_Position = gl_in[i].gl_Position;\n"
" GSout = GSin[i];\n"
"#if GS_IIP == 0\n"
" if (i == 0)\n"
@ -248,7 +236,7 @@ static const char* tfx_glsl =
"void gs_main()\n"
"{\n"
" for(int i = 0; i < gl_in.length(); i++) {\n"
" gl_Position = gl_in[i].gl_Position; // FIXME is it useful\n"
" gl_Position = gl_in[i].gl_Position;\n"
" GSout = GSin[i];\n"
"#if GS_IIP == 0\n"
" if (i == 0 || i == 1)\n"
@ -327,23 +315,19 @@ static const char* tfx_glsl =
"#ifdef FRAGMENT_SHADER\n"
"#if __VERSION__ > 140 && !(defined(NO_STRUCT))\n"
"layout(location = 0) in vertex PSin;\n"
"//#define PSin_p (PSin.p)\n"
"#define PSin_t (PSin.t)\n"
"#define PSin_tp (PSin.tp)\n"
"#define PSin_c (PSin.c)\n"
"#else\n"
"#ifdef DISABLE_SSO\n"
"in vec4 SHADERp;\n"
"in vec4 SHADERt;\n"
"in vec4 SHADERtp;\n"
"in vec4 SHADERc;\n"
"#else\n"
"//layout(location = 0) in vec4 SHADERp;\n"
"layout(location = 0) in vec4 SHADERt;\n"
"layout(location = 1) in vec4 SHADERtp;\n"
"layout(location = 2) in vec4 SHADERc;\n"
"#endif\n"
"//#define PSin_p SHADERp\n"
"#define PSin_t SHADERt\n"
"#define PSin_tp SHADERtp\n"
"#define PSin_c SHADERc\n"
@ -393,10 +377,7 @@ static const char* tfx_glsl =
" uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;\n"
" }\n"
"\n"
" // FIXME I'm not sure it is a good solution to flip texture\n"
" return texture(TextureSampler, uv);\n"
" //FIXME another way to FLIP vertically\n"
" //return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) );\n"
"}\n"
"\n"
"vec4 sample_p(float u)\n"
@ -726,12 +707,8 @@ static const char* tfx_glsl =
"\n"
"void ps_main()\n"
"{\n"
" //FIXME\n"
" vec4 c = ps_color();\n"
"\n"
" // FIXME: I'm not sure about the value of others field\n"
" // output.c1 = c.a * 2; // used for alpha blending\n"
"\n"
" float alpha = c.a * 2;\n"
"\n"
" if(PS_AOUT != 0) // 16 bit output\n"