mirror of https://github.com/PCSX2/pcsx2.git
gsdx ogl:
* Separate state and shader compilation into separate function * replace various hash_map by basic array * Compact VertexScale and offset into a single vec4 * add the new option "ogl_vertex_subdata": subdata is faster on FGLRX, test are welcome on Nvidia drivers 0 => use map/unmap 1 => use subdata replay: add "linux_replay" option and compute some nice stat (mean, standard deviation) cmake: recreate shader header at build time git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5682 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
9cd463e4f8
commit
ca1edbf2cb
|
@ -3,9 +3,11 @@
|
|||
use strict;
|
||||
use warnings;
|
||||
use File::Spec;
|
||||
use File::Basename;
|
||||
use Cwd 'abs_path';
|
||||
|
||||
my @res = qw/convert interlace merge shadeboost tfx/;
|
||||
my $path = File::Spec->catdir("plugins", "GSdx", "res");
|
||||
my $path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res");
|
||||
|
||||
foreach my $r (@res) {
|
||||
glsl2h($path, $r, "glsl");
|
||||
|
|
|
@ -177,8 +177,20 @@ set(GSdxHeaders
|
|||
xbyak/xbyak_util.h
|
||||
)
|
||||
|
||||
set(GSdxHeaders
|
||||
res/convert.h
|
||||
res/fxaa.h
|
||||
res/interlace.h
|
||||
res/merge.h
|
||||
res/shaderboost.h
|
||||
res/tfx.h
|
||||
)
|
||||
|
||||
include_directories(.)
|
||||
|
||||
# Generate Glsl header file
|
||||
add_custom_command(OUTPUT res/convert.h res/fxaa.h res/interlace.h res/merge.h res/shaderboost.h res/tfx.h COMMAND perl ${PROJECT_SOURCE_DIR}/linux_various/glsl2h.pl)
|
||||
|
||||
add_library(${Output} SHARED ${GSdxSources} ${GSdxHeaders})
|
||||
|
||||
target_link_libraries(${Output} ${X11_LIBRARIES})
|
||||
|
|
|
@ -1424,6 +1424,9 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
|
|||
return;
|
||||
}
|
||||
|
||||
vector<float> stats;
|
||||
stats.clear();
|
||||
|
||||
if(FILE* fp = fopen(lpszCmdLine, "rb"))
|
||||
{
|
||||
//Console console("GSdx", true);
|
||||
|
@ -1522,11 +1525,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
|
|||
|
||||
//while(IsWindowVisible(hWnd))
|
||||
//FIXME map?
|
||||
int finished = 2;
|
||||
int finished = theApp.GetConfig("linux_replay", 1);
|
||||
unsigned long frame_number = 0;
|
||||
while(finished > 0)
|
||||
{
|
||||
frame_number = 0;
|
||||
unsigned long start = timeGetTime();
|
||||
unsigned long frame_number = 0;
|
||||
for(auto i = packets.begin(); i != packets.end(); i++)
|
||||
{
|
||||
Packet* p = *i;
|
||||
|
@ -1571,10 +1575,30 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
|
|||
fprintf(stderr, "The %ld frames of the scene was render on %ldms\n", frame_number, end - start);
|
||||
fprintf(stderr, "A means of %fms by frame\n", (float)(end - start)/(float)frame_number);
|
||||
|
||||
stats.push_back((float)(end - start));
|
||||
|
||||
|
||||
sleep(1);
|
||||
finished--;
|
||||
}
|
||||
|
||||
// Print some nice stats
|
||||
float n = (float)theApp.GetConfig("linux_replay", 1);
|
||||
float mean = 0;
|
||||
float sd = 0;
|
||||
for (auto i = stats.begin(); i != stats.end(); i++) {
|
||||
mean += *i;
|
||||
}
|
||||
mean = mean/n;
|
||||
for (auto i = stats.begin(); i != stats.end(); i++) {
|
||||
sd += pow((*i)-mean, 2);
|
||||
}
|
||||
sd = sqrt(sd/n);
|
||||
|
||||
fprintf(stderr, "\n\nMean: %fms\n", mean);
|
||||
fprintf(stderr, "Standard deviation: %fms\n", sd);
|
||||
fprintf(stderr, "Mean by frame: %fms (%ffps)\n", mean/(float)frame_number, 1000.0f*frame_number/mean);
|
||||
fprintf(stderr, "Standard deviatin by frame: %fms\n", sd/(float)frame_number);
|
||||
|
||||
for(auto i = packets.begin(); i != packets.end(); i++)
|
||||
{
|
||||
|
@ -1589,6 +1613,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
|
|||
GSshutdown();
|
||||
|
||||
fclose(fp);
|
||||
} else {
|
||||
fprintf(stderr, "failed to open %s\n", lpszCmdLine);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -58,6 +58,7 @@ GSDeviceOGL::GSDeviceOGL()
|
|||
, m_vb_sr(NULL)
|
||||
{
|
||||
m_msaa = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_MSAA", 0) : 0;
|
||||
m_debug_shader = !!theApp.GetConfig("debug_ogl_shader", 1);
|
||||
|
||||
memset(&m_merge_obj, 0, sizeof(m_merge_obj));
|
||||
memset(&m_interlace, 0, sizeof(m_interlace));
|
||||
|
@ -134,24 +135,22 @@ GSDeviceOGL::~GSDeviceOGL()
|
|||
delete m_vb;
|
||||
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
for (auto it = m_vs.begin(); it != m_vs.end() ; it++) gl_DeleteProgram(it->second);
|
||||
for (auto it = m_gs.begin(); it != m_gs.end() ; it++) gl_DeleteProgram(it->second);
|
||||
for (uint32 key = 0; key < VSSelector::size(); key++) gl_DeleteProgram(m_vs[key]);
|
||||
for (uint32 key = 0; key < GSSelector::size(); key++) gl_DeleteProgram(m_gs[key]);
|
||||
for (auto it = m_ps.begin(); it != m_ps.end() ; it++) gl_DeleteProgram(it->second);
|
||||
} else {
|
||||
for (auto it = m_vs.begin(); it != m_vs.end() ; it++) gl_DeleteShader(it->second);
|
||||
for (auto it = m_gs.begin(); it != m_gs.end() ; it++) gl_DeleteShader(it->second);
|
||||
for (uint32 key = 0; key < VSSelector::size(); key++) gl_DeleteShader(m_vs[key]);
|
||||
for (uint32 key = 0; key < GSSelector::size(); key++) gl_DeleteShader(m_gs[key]);
|
||||
for (auto it = m_ps.begin(); it != m_ps.end() ; it++) gl_DeleteShader(it->second);
|
||||
|
||||
for (auto it = m_single_prog.begin(); it != m_single_prog.end() ; it++) gl_DeleteProgram(it->second);
|
||||
m_single_prog.clear();
|
||||
}
|
||||
|
||||
for (auto it = m_ps_ss.begin(); it != m_ps_ss.end() ; it++) gl_DeleteSamplers(1, &it->second);
|
||||
m_vs.clear();
|
||||
m_gs.clear();
|
||||
gl_DeleteSamplers(PSSamplerSelector::size(), m_ps_ss);
|
||||
|
||||
for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++) delete m_om_dss[key];
|
||||
m_ps.clear();
|
||||
m_ps_ss.clear();
|
||||
m_om_dss.clear();
|
||||
m_om_bs.clear();
|
||||
}
|
||||
|
||||
|
@ -248,8 +247,8 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
|
|||
hr = m_dev->CreateBlendState(&bsd, &m_convert.bs);
|
||||
#endif
|
||||
|
||||
CreateSampler(m_convert.ln, true, false, false);
|
||||
CreateSampler(m_convert.pt, false, false, false);
|
||||
m_convert.ln = CreateSampler(true, false, false);
|
||||
m_convert.pt = CreateSampler(false, false, false);
|
||||
|
||||
m_convert.dss = new GSDepthStencilOGL();
|
||||
m_convert.bs = new GSBlendStateOGL();
|
||||
|
@ -625,8 +624,9 @@ void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c)
|
|||
glEnable(GL_SCISSOR_TEST);
|
||||
}
|
||||
|
||||
void GSDeviceOGL::CreateSampler(GLuint& sampler, bool bilinear, bool tau, bool tav)
|
||||
GLuint GSDeviceOGL::CreateSampler(bool bilinear, bool tau, bool tav)
|
||||
{
|
||||
GLuint sampler;
|
||||
gl_GenSamplers(1, &sampler);
|
||||
if (bilinear) {
|
||||
gl_SamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
|
@ -657,6 +657,8 @@ void GSDeviceOGL::CreateSampler(GLuint& sampler, bool bilinear, bool tau, bool t
|
|||
gl_SamplerParameteri(sampler, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);
|
||||
gl_SamplerParameteri(sampler, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
|
||||
// FIXME: need ogl extension sd.MaxAnisotropy = 16;
|
||||
|
||||
return sampler;
|
||||
}
|
||||
|
||||
GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format)
|
||||
|
@ -1363,7 +1365,7 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st
|
|||
free(header_str);
|
||||
free(sources_array);
|
||||
|
||||
if (theApp.GetConfig("debug_ogl_shader", 1) == 1) {
|
||||
if (m_debug_shader) {
|
||||
GLint log_length = 0;
|
||||
GLint status = false;
|
||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||
|
|
|
@ -240,14 +240,12 @@ class GSDeviceOGL : public GSDevice
|
|||
public:
|
||||
__aligned(struct, 32) VSConstantBuffer
|
||||
{
|
||||
GSVector4 VertexScale;
|
||||
GSVector4 VertexOffset;
|
||||
GSVector4 Vertex_Scale_Offset;
|
||||
GSVector4 TextureScale;
|
||||
|
||||
VSConstantBuffer()
|
||||
{
|
||||
VertexScale = GSVector4::zero();
|
||||
VertexOffset = GSVector4::zero();
|
||||
Vertex_Scale_Offset = GSVector4::zero();
|
||||
TextureScale = GSVector4::zero();
|
||||
}
|
||||
|
||||
|
@ -258,13 +256,11 @@ class GSDeviceOGL : public GSDevice
|
|||
|
||||
GSVector4i b0 = b[0];
|
||||
GSVector4i b1 = b[1];
|
||||
GSVector4i b2 = b[2];
|
||||
|
||||
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue())
|
||||
if(!((a[0] == b0) & (a[1] == b1)).alltrue())
|
||||
{
|
||||
a[0] = b0;
|
||||
a[1] = b1;
|
||||
a[2] = b2;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -283,7 +279,6 @@ class GSDeviceOGL : public GSDevice
|
|||
uint32 tme:1;
|
||||
uint32 fst:1;
|
||||
uint32 logz:1;
|
||||
//uint32 rtcopy:1;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
|
@ -292,6 +287,9 @@ class GSDeviceOGL : public GSDevice
|
|||
operator uint32() {return key & 0x3f;}
|
||||
|
||||
VSSelector() : key(0) {}
|
||||
VSSelector(uint32 k) : key(k) {}
|
||||
|
||||
static uint32 size() { return 1 << 5; }
|
||||
};
|
||||
|
||||
__aligned(struct, 32) PSConstantBuffer
|
||||
|
@ -327,7 +325,8 @@ class GSDeviceOGL : public GSDevice
|
|||
GSVector4i b4 = b[4];
|
||||
GSVector4i b5 = b[5];
|
||||
|
||||
if(!((a[0] == b0) /*& (a[1] == b1)*/ & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) // if WH matches HalfTexel does too
|
||||
// if WH matches both HalfTexel and TC_OffsetHack do too
|
||||
if(!((a[0] == b0) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue())
|
||||
{
|
||||
a[0] = b0;
|
||||
a[1] = b1;
|
||||
|
@ -359,6 +358,9 @@ class GSDeviceOGL : public GSDevice
|
|||
operator uint32() {return key & 0x7;}
|
||||
|
||||
GSSelector() : key(0) {}
|
||||
GSSelector(uint32 k) : key(k) {}
|
||||
|
||||
static uint32 size() { return 1 << 3; }
|
||||
};
|
||||
|
||||
struct PSSelector
|
||||
|
@ -413,6 +415,9 @@ class GSDeviceOGL : public GSDevice
|
|||
operator uint32() {return key & 0x7;}
|
||||
|
||||
PSSamplerSelector() : key(0) {}
|
||||
PSSamplerSelector(uint32 k) : key(k) {}
|
||||
|
||||
static uint32 size() { return 1 << 3; }
|
||||
};
|
||||
|
||||
struct OMDepthStencilSelector
|
||||
|
@ -434,6 +439,9 @@ class GSDeviceOGL : public GSDevice
|
|||
operator uint32() {return key & 0x3f;}
|
||||
|
||||
OMDepthStencilSelector() : key(0) {}
|
||||
OMDepthStencilSelector(uint32 k) : key(k) {}
|
||||
|
||||
static uint32 size() { return 1 << 6; }
|
||||
};
|
||||
|
||||
struct OMBlendSelector
|
||||
|
@ -490,6 +498,8 @@ class GSDeviceOGL : public GSDevice
|
|||
GSVertexBufferStateOGL* m_vb; // vb_state for HW renderer
|
||||
GSVertexBufferStateOGL* m_vb_sr; // vb_state for StretchRect
|
||||
|
||||
bool m_debug_shader;
|
||||
|
||||
struct {
|
||||
GLuint ps[2]; // program object
|
||||
GSUniformBufferOGL* cb; // uniform buffer object
|
||||
|
@ -552,11 +562,11 @@ class GSDeviceOGL : public GSDevice
|
|||
GLenum draw;
|
||||
} m_state;
|
||||
|
||||
hash_map<uint32, GLuint > m_vs;
|
||||
hash_map<uint32, GLuint > m_gs;
|
||||
GLuint m_vs[1<<5];
|
||||
GLuint m_gs[1<<3];
|
||||
GLuint m_ps_ss[1<<3];
|
||||
GSDepthStencilOGL* m_om_dss[1<<6];
|
||||
hash_map<uint32, GLuint > m_ps;
|
||||
hash_map<uint32, GLuint > m_ps_ss;
|
||||
hash_map<uint32, GSDepthStencilOGL* > m_om_dss;
|
||||
hash_map<uint32, GSBlendStateOGL* > m_om_bs;
|
||||
|
||||
GLuint m_palette_ss;
|
||||
|
@ -603,7 +613,6 @@ class GSDeviceOGL : public GSDevice
|
|||
void ClearDepth(GSTexture* t, float c);
|
||||
void ClearStencil(GSTexture* t, uint8 c);
|
||||
|
||||
void CreateSampler(GLuint& sampler, bool bilinear, bool tau, bool tav);
|
||||
GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0);
|
||||
GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0);
|
||||
GSTexture* CreateTexture(int w, int h, int format = 0);
|
||||
|
@ -648,6 +657,15 @@ class GSDeviceOGL : public GSDevice
|
|||
|
||||
|
||||
void CreateTextureFX();
|
||||
GLuint CompileVS(VSSelector sel);
|
||||
GLuint CompileGS(GSSelector sel);
|
||||
GLuint CompilePS(PSSelector sel);
|
||||
GLuint CreateSampler(bool bilinear, bool tau, bool tav);
|
||||
GLuint CreateSampler(PSSamplerSelector sel);
|
||||
GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);
|
||||
GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, uint8 afix);
|
||||
|
||||
|
||||
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||
void SetupGS(GSSelector sel);
|
||||
|
|
|
@ -157,21 +157,23 @@ void GSRendererOGL::SetupIA()
|
|||
|
||||
dev->IASetVertexState();
|
||||
|
||||
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next))
|
||||
{
|
||||
GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
|
||||
|
||||
if(UserHacks_WildHack && !isPackedUV_HackFlag)
|
||||
if(UserHacks_WildHack && !isPackedUV_HackFlag) {
|
||||
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next))
|
||||
{
|
||||
GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
|
||||
|
||||
GSVertex* RESTRICT d = (GSVertex*)ptr;
|
||||
|
||||
for(unsigned int i = 0; i < m_vertex.next; i++)
|
||||
{
|
||||
if(PRIM->TME && PRIM->FST) d[i].UV &= 0x3FEF3FEF;
|
||||
}
|
||||
}
|
||||
|
||||
dev->IAUnmapVertexBuffer();
|
||||
dev->IAUnmapVertexBuffer();
|
||||
}
|
||||
} else {
|
||||
// By default use the common path (in case it can be made faster)
|
||||
dev->IASetVertexBuffer(m_vertex.buff, m_vertex.next);
|
||||
}
|
||||
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
|
||||
|
@ -202,7 +204,6 @@ void GSRendererOGL::SetupIA()
|
|||
dev->IASetPrimitiveTopology(t);
|
||||
}
|
||||
|
||||
|
||||
void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
GSDrawingEnvironment& env = m_env;
|
||||
|
@ -213,8 +214,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||
|
||||
//OGL GSTexture* rtcopy = NULL;
|
||||
|
||||
ASSERT(m_dev != NULL);
|
||||
|
||||
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
|
||||
|
@ -232,32 +231,14 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
GSVertexPT1 vertices[] =
|
||||
{
|
||||
#if 0
|
||||
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
|
||||
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
|
||||
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
|
||||
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
|
||||
#else
|
||||
{GSVector4(dst.x, dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
|
||||
{GSVector4(dst.z, dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
|
||||
{GSVector4(dst.x, dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
|
||||
{GSVector4(dst.z, dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
|
||||
#endif
|
||||
};
|
||||
//fprintf(stderr, "DATE A:%fx%f B:%fx%f\n", dst.x, -dst.y, dst.z, -dst.w);
|
||||
//fprintf(stderr, "DATE SR: %f %f %f %f\n", src.x, src.y, src.z, src.w);
|
||||
//fprintf(stderr, "DATE offset: %f\n", o.x);
|
||||
|
||||
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
|
||||
}
|
||||
else
|
||||
{
|
||||
//OGL rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
|
||||
|
||||
//OGL // I'll use VertexTrace when I consider it more trustworthy
|
||||
|
||||
//OGL dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -320,7 +301,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
vs_sel.tme = PRIM->TME;
|
||||
vs_sel.fst = PRIM->FST;
|
||||
vs_sel.logz = m_logz ? 1 : 0;
|
||||
//OGL vs_sel.rtcopy = !!rtcopy;
|
||||
|
||||
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
|
||||
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
|
||||
|
@ -363,8 +343,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
|
||||
float ox = (float)(int)context->XYOFFSET.OFX;
|
||||
float oy = (float)(int)context->XYOFFSET.OFY;
|
||||
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
|
||||
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
|
||||
float ox2 = -1.0f / rtsize.x;
|
||||
float oy2 = -1.0f / rtsize.y;
|
||||
|
||||
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
|
||||
//because DX10 and DX9 have a different pixel center.)
|
||||
|
@ -374,16 +354,12 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
if(rt->LikelyOffset)
|
||||
{
|
||||
// DX9 has pixelcenter set to 0.0, so give it some value here
|
||||
|
||||
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
|
||||
|
||||
ox2 *= rt->OffsetHack_modx;
|
||||
oy2 *= rt->OffsetHack_mody;
|
||||
}
|
||||
|
||||
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
|
||||
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
|
||||
// Note: DX does y *= -1.0
|
||||
vs_cb.Vertex_Scale_Offset = GSVector4(sx, sy, ox * sx + ox2 + 1, oy * sy + oy2 + 1);
|
||||
// END of FIXME
|
||||
|
||||
// gs
|
||||
|
@ -519,7 +495,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
dev->OMSetRenderTargets(rt, ds, &scissor);
|
||||
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
|
||||
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
|
||||
//OGL dev->PSSetShaderResource(2, rtcopy);
|
||||
|
||||
uint8 afix = context->ALPHA.FIX;
|
||||
|
||||
|
@ -607,7 +582,5 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
dev->EndScene();
|
||||
|
||||
//OGL dev->Recycle(rtcopy);
|
||||
|
||||
if(om_dssel.fba) UpdateFBA(rt);
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ void GSDeviceOGL::CreateTextureFX()
|
|||
m_vs_cb = new GSUniformBufferOGL(g_vs_cb_index, sizeof(VSConstantBuffer));
|
||||
m_ps_cb = new GSUniformBufferOGL(g_ps_cb_index, sizeof(PSConstantBuffer));
|
||||
|
||||
CreateSampler(m_palette_ss, false, false, false);
|
||||
m_palette_ss = CreateSampler(false, false, false);
|
||||
|
||||
GSInputLayoutOGL vert_format[] =
|
||||
{
|
||||
|
@ -56,77 +56,158 @@ void GSDeviceOGL::CreateTextureFX()
|
|||
|
||||
// Pre compile all Geometry & Vertex Shader
|
||||
// It might cost a seconds at startup but it would reduce benchmark pollution
|
||||
GSDeviceOGL::GSSelector gs_sel;
|
||||
for (uint32 key = 0; key < (1 << 3); key++) {
|
||||
gs_sel.key = key;
|
||||
SetupGS(gs_sel);
|
||||
for (uint32 key = 0; key < GSSelector::size(); key++)
|
||||
m_gs[key] = CompileGS(GSSelector(key));
|
||||
|
||||
for (uint32 key = 0; key < VSSelector::size(); key++)
|
||||
m_vs[key] = CompileVS(VSSelector(key));
|
||||
|
||||
for (uint32 key = 0; key < PSSamplerSelector::size(); key++)
|
||||
m_ps_ss[key] = CreateSampler(PSSamplerSelector(key));
|
||||
|
||||
for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++)
|
||||
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
|
||||
|
||||
}
|
||||
|
||||
GLuint GSDeviceOGL::CompileVS(VSSelector sel)
|
||||
{
|
||||
GLuint vs;
|
||||
std::string macro = format("#define VS_BPPZ %d\n", sel.bppz)
|
||||
+ format("#define VS_LOGZ %d\n", sel.logz)
|
||||
+ format("#define VS_TME %d\n", sel.tme)
|
||||
+ format("#define VS_FST %d\n", sel.fst);
|
||||
|
||||
CompileShaderFromSource("tfx.glsl", "vs_main", GL_VERTEX_SHADER, &vs, tfx_glsl, macro);
|
||||
|
||||
return vs;
|
||||
}
|
||||
|
||||
GLuint GSDeviceOGL::CompileGS(GSSelector sel)
|
||||
{
|
||||
GLuint gs;
|
||||
// Easy case
|
||||
if(! (sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)))
|
||||
return 0;
|
||||
|
||||
std::string macro = format("#define GS_IIP %d\n", sel.iip)
|
||||
+ format("#define GS_PRIM %d\n", sel.prim);
|
||||
|
||||
CompileShaderFromSource("tfx.glsl", "gs_main", GL_GEOMETRY_SHADER, &gs, tfx_glsl, macro);
|
||||
|
||||
return gs;
|
||||
}
|
||||
|
||||
GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel)
|
||||
{
|
||||
return CreateSampler(sel.ltf, sel.tau, sel.tav);
|
||||
}
|
||||
|
||||
GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel)
|
||||
{
|
||||
GSDepthStencilOGL* dss = new GSDepthStencilOGL();
|
||||
|
||||
if (dssel.date)
|
||||
{
|
||||
dss->EnableStencil();
|
||||
dss->SetStencil(GL_EQUAL, dssel.alpha_stencil ? GL_ZERO : GL_KEEP);
|
||||
}
|
||||
GSDeviceOGL::VSSelector vs_sel;
|
||||
for (uint32 key = 0; key < (1 << 5); key++) {
|
||||
vs_sel.key = key;
|
||||
SetupVS(vs_sel, NULL);
|
||||
|
||||
if(dssel.ztst != ZTST_ALWAYS || dssel.zwe)
|
||||
{
|
||||
static const GLenum ztst[] =
|
||||
{
|
||||
GL_NEVER,
|
||||
GL_ALWAYS,
|
||||
GL_GEQUAL,
|
||||
GL_GREATER
|
||||
};
|
||||
dss->EnableDepth();
|
||||
dss->SetDepth(ztst[dssel.ztst], dssel.zwe);
|
||||
}
|
||||
// Use sane reset value
|
||||
GSSetShader(0);
|
||||
VSSetShader(0);
|
||||
|
||||
return dss;
|
||||
}
|
||||
|
||||
GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix)
|
||||
{
|
||||
GSBlendStateOGL* bs = new GSBlendStateOGL();
|
||||
|
||||
if(bsel.abe)
|
||||
{
|
||||
int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;
|
||||
|
||||
bs->EnableBlend();
|
||||
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
|
||||
|
||||
if(m_blendMapD3D9[i].bogus == 1)
|
||||
{
|
||||
if (bsel.a == 0)
|
||||
bs->SetRGB(m_blendMapD3D9[i].op, GL_ONE, m_blendMapD3D9[i].dst);
|
||||
else
|
||||
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE);
|
||||
|
||||
const string afixstr = format("%d >> 7", afix);
|
||||
const char *col[3] = {"Cs", "Cd", "0"};
|
||||
const char *alpha[3] = {"As", "Ad", afixstr.c_str()};
|
||||
|
||||
// FIXME, need to investigate OGL capabilities. Maybe for OGL5 ;)
|
||||
fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]);
|
||||
}
|
||||
|
||||
// Not very good but I don't wanna write another 81 row table
|
||||
if(bsel.negative) bs->RevertOp();
|
||||
}
|
||||
|
||||
bs->SetMask(bsel.wr, bsel.wg, bsel.wb, bsel.wa);
|
||||
|
||||
return bs;
|
||||
}
|
||||
|
||||
GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
||||
{
|
||||
GLuint ps;
|
||||
|
||||
std::string macro = format("#define PS_FST %d\n", sel.fst)
|
||||
+ format("#define PS_WMS %d\n", sel.wms)
|
||||
+ format("#define PS_WMT %d\n", sel.wmt)
|
||||
+ format("#define PS_FMT %d\n", sel.fmt)
|
||||
+ format("#define PS_AEM %d\n", sel.aem)
|
||||
+ format("#define PS_TFX %d\n", sel.tfx)
|
||||
+ format("#define PS_TCC %d\n", sel.tcc)
|
||||
+ format("#define PS_ATST %d\n", sel.atst)
|
||||
+ format("#define PS_FOG %d\n", sel.fog)
|
||||
+ format("#define PS_CLR1 %d\n", sel.clr1)
|
||||
+ format("#define PS_FBA %d\n", sel.fba)
|
||||
+ format("#define PS_AOUT %d\n", sel.aout)
|
||||
+ format("#define PS_LTF %d\n", sel.ltf)
|
||||
+ format("#define PS_COLCLIP %d\n", sel.colclip)
|
||||
+ format("#define PS_DATE %d\n", sel.date)
|
||||
+ format("#define PS_SPRITEHACK %d\n", sel.spritehack)
|
||||
+ format("#define PS_TCOFFSETHACK %d\n", sel.tcoffsethack)
|
||||
+ format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler);
|
||||
|
||||
CompileShaderFromSource("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, &ps, tfx_glsl, macro);
|
||||
|
||||
return ps;
|
||||
}
|
||||
|
||||
void GSDeviceOGL::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||
{
|
||||
// *************************************************************
|
||||
// Static
|
||||
// *************************************************************
|
||||
auto i = m_vs.find(sel);
|
||||
GLuint vs = m_vs[sel];
|
||||
|
||||
if(i == m_vs.end())
|
||||
{
|
||||
std::string macro = format("#define VS_BPPZ %d\n", sel.bppz)
|
||||
+ format("#define VS_LOGZ %d\n", sel.logz)
|
||||
+ format("#define VS_TME %d\n", sel.tme)
|
||||
+ format("#define VS_FST %d\n", sel.fst);
|
||||
|
||||
GLuint vs;
|
||||
CompileShaderFromSource("tfx.glsl", "vs_main", GL_VERTEX_SHADER, &vs, tfx_glsl, macro);
|
||||
|
||||
m_vs[sel] = vs;
|
||||
i = m_vs.find(sel);
|
||||
}
|
||||
|
||||
// *************************************************************
|
||||
// Dynamic
|
||||
// *************************************************************
|
||||
if(cb != NULL && m_vs_cb_cache.Update(cb)) {
|
||||
if(m_vs_cb_cache.Update(cb)) {
|
||||
SetUniformBuffer(m_vs_cb);
|
||||
m_vs_cb->upload(cb);
|
||||
}
|
||||
|
||||
VSSetShader(i->second);
|
||||
VSSetShader(vs);
|
||||
}
|
||||
|
||||
void GSDeviceOGL::SetupGS(GSSelector sel)
|
||||
{
|
||||
// *************************************************************
|
||||
// Static
|
||||
// *************************************************************
|
||||
GLuint gs = 0;
|
||||
if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3))
|
||||
{
|
||||
auto i = m_gs.find(sel);
|
||||
GLuint gs = m_gs[sel];
|
||||
|
||||
if(i == m_gs.end()) {
|
||||
std::string macro = format("#define GS_IIP %d\n", sel.iip)
|
||||
+ format("#define GS_PRIM %d\n", sel.prim);
|
||||
|
||||
CompileShaderFromSource("tfx.glsl", "gs_main", GL_GEOMETRY_SHADER, &gs, tfx_glsl, macro);
|
||||
|
||||
m_gs[sel] = gs;
|
||||
} else {
|
||||
gs = i->second;
|
||||
}
|
||||
}
|
||||
// *************************************************************
|
||||
// Dynamic
|
||||
// *************************************************************
|
||||
GSSetShader(gs);
|
||||
}
|
||||
|
||||
|
@ -138,29 +219,8 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerS
|
|||
GLuint ps;
|
||||
auto i = m_ps.find(sel);
|
||||
|
||||
if (i == m_ps.end())
|
||||
{
|
||||
std::string macro = format("#define PS_FST %d\n", sel.fst)
|
||||
+ format("#define PS_WMS %d\n", sel.wms)
|
||||
+ format("#define PS_WMT %d\n", sel.wmt)
|
||||
+ format("#define PS_FMT %d\n", sel.fmt)
|
||||
+ format("#define PS_AEM %d\n", sel.aem)
|
||||
+ format("#define PS_TFX %d\n", sel.tfx)
|
||||
+ format("#define PS_TCC %d\n", sel.tcc)
|
||||
+ format("#define PS_ATST %d\n", sel.atst)
|
||||
+ format("#define PS_FOG %d\n", sel.fog)
|
||||
+ format("#define PS_CLR1 %d\n", sel.clr1)
|
||||
+ format("#define PS_FBA %d\n", sel.fba)
|
||||
+ format("#define PS_AOUT %d\n", sel.aout)
|
||||
+ format("#define PS_LTF %d\n", sel.ltf)
|
||||
+ format("#define PS_COLCLIP %d\n", sel.colclip)
|
||||
+ format("#define PS_DATE %d\n", sel.date)
|
||||
+ format("#define PS_SPRITEHACK %d\n", sel.spritehack)
|
||||
+ format("#define PS_TCOFFSETHACK %d\n", sel.tcoffsethack)
|
||||
+ format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler);
|
||||
|
||||
CompileShaderFromSource("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, &ps, tfx_glsl, macro);
|
||||
|
||||
if (i == m_ps.end()) {
|
||||
ps = CompilePS(sel);
|
||||
m_ps[sel] = ps;
|
||||
} else {
|
||||
ps = i->second;
|
||||
|
@ -183,21 +243,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerS
|
|||
ssel.ltf = 0;
|
||||
}
|
||||
|
||||
auto i = m_ps_ss.find(ssel);
|
||||
|
||||
if(i != m_ps_ss.end())
|
||||
{
|
||||
ss0 = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
// *************************************************************
|
||||
// Static
|
||||
// *************************************************************
|
||||
CreateSampler(ss0, ssel.ltf, ssel.tau, ssel.tav);
|
||||
|
||||
m_ps_ss[ssel] = ss0;
|
||||
}
|
||||
ss0 = m_ps_ss[ssel];
|
||||
|
||||
if(sel.fmt >= 3)
|
||||
{
|
||||
|
@ -211,86 +257,26 @@ void GSDeviceOGL::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerS
|
|||
|
||||
void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
|
||||
{
|
||||
auto i = m_om_dss.find(dssel);
|
||||
GSDepthStencilOGL* dss = m_om_dss[dssel];
|
||||
|
||||
// *************************************************************
|
||||
// Static
|
||||
// *************************************************************
|
||||
if (i == m_om_dss.end())
|
||||
{
|
||||
GSDepthStencilOGL* dss = new GSDepthStencilOGL();
|
||||
|
||||
if (dssel.date)
|
||||
{
|
||||
dss->EnableStencil();
|
||||
dss->SetStencil(GL_EQUAL, dssel.alpha_stencil ? GL_ZERO : GL_KEEP);
|
||||
}
|
||||
|
||||
if(dssel.ztst != ZTST_ALWAYS || dssel.zwe)
|
||||
{
|
||||
static const GLenum ztst[] =
|
||||
{
|
||||
GL_NEVER,
|
||||
GL_ALWAYS,
|
||||
GL_GEQUAL,
|
||||
GL_GREATER
|
||||
};
|
||||
dss->EnableDepth();
|
||||
dss->SetDepth(ztst[dssel.ztst], dssel.zwe);
|
||||
}
|
||||
|
||||
m_om_dss[dssel] = dss;
|
||||
i = m_om_dss.find(dssel);
|
||||
}
|
||||
|
||||
// *************************************************************
|
||||
// Dynamic
|
||||
// *************************************************************
|
||||
OMSetDepthStencilState(i->second, 1);
|
||||
OMSetDepthStencilState(dss, 1);
|
||||
|
||||
// *************************************************************
|
||||
// Static
|
||||
// *************************************************************
|
||||
auto j = m_om_bs.find(bsel);
|
||||
GSBlendStateOGL* bs;
|
||||
|
||||
if(j == m_om_bs.end())
|
||||
{
|
||||
GSBlendStateOGL* bs = new GSBlendStateOGL();
|
||||
|
||||
if(bsel.abe)
|
||||
{
|
||||
int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;
|
||||
|
||||
bs->EnableBlend();
|
||||
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
|
||||
|
||||
if(m_blendMapD3D9[i].bogus == 1)
|
||||
{
|
||||
if (bsel.a == 0)
|
||||
bs->SetRGB(m_blendMapD3D9[i].op, GL_ONE, m_blendMapD3D9[i].dst);
|
||||
else
|
||||
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE);
|
||||
|
||||
const string afixstr = format("%d >> 7", afix);
|
||||
const char *col[3] = {"Cs", "Cd", "0"};
|
||||
const char *alpha[3] = {"As", "Ad", afixstr.c_str()};
|
||||
|
||||
// FIXME, need to investigate OGL capabilities. Maybe for OGL5 ;)
|
||||
fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]);
|
||||
}
|
||||
|
||||
// Not very good but I don't wanna write another 81 row table
|
||||
if(bsel.negative) bs->RevertOp();
|
||||
}
|
||||
|
||||
bs->SetMask(bsel.wr, bsel.wg, bsel.wb, bsel.wa);
|
||||
|
||||
bs = CreateBlend(bsel, afix);
|
||||
m_om_bs[bsel] = bs;
|
||||
j = m_om_bs.find(bsel);
|
||||
} else {
|
||||
bs = j->second;
|
||||
}
|
||||
|
||||
// *************************************************************
|
||||
// Dynamic
|
||||
// *************************************************************
|
||||
OMSetBlendState(j->second, (float)(int)afix / 0x80);
|
||||
OMSetBlendState(bs, (float)(int)afix / 0x80);
|
||||
}
|
||||
|
|
|
@ -31,13 +31,13 @@ struct GSInputLayoutOGL {
|
|||
};
|
||||
|
||||
class GSBufferOGL {
|
||||
size_t m_stride;
|
||||
const size_t m_stride;
|
||||
size_t m_start;
|
||||
size_t m_count;
|
||||
size_t m_limit;
|
||||
GLenum m_target;
|
||||
const GLenum m_target;
|
||||
GLuint m_buffer;
|
||||
size_t m_default_size;
|
||||
const bool m_sub_data_config;
|
||||
|
||||
public:
|
||||
GSBufferOGL(GLenum target, size_t stride) :
|
||||
|
@ -46,15 +46,16 @@ class GSBufferOGL {
|
|||
, m_count(0)
|
||||
, m_limit(0)
|
||||
, m_target(target)
|
||||
, m_sub_data_config((bool)theApp.GetConfig("ogl_vertex_subdata", 1))
|
||||
{
|
||||
gl_GenBuffers(1, &m_buffer);
|
||||
// Opengl works best with 1-4MB buffer.
|
||||
m_default_size = 2 * 1024 * 1024 / m_stride;
|
||||
m_limit = 2 * 1024 * 1024 / m_stride;
|
||||
}
|
||||
|
||||
~GSBufferOGL() { gl_DeleteBuffers(1, &m_buffer); }
|
||||
|
||||
void allocate() { allocate(m_default_size); }
|
||||
void allocate() { allocate(m_limit); }
|
||||
|
||||
void allocate(size_t new_limit)
|
||||
{
|
||||
|
@ -68,9 +69,26 @@ class GSBufferOGL {
|
|||
gl_BindBuffer(m_target, m_buffer);
|
||||
}
|
||||
|
||||
void upload(const void* src, uint32 count)
|
||||
void subdata_upload(const void* src, uint32 count)
|
||||
{
|
||||
m_count = count;
|
||||
|
||||
// Current GPU buffer is really too small need to allocate a new one
|
||||
if (m_count > m_limit) {
|
||||
allocate(std::max<int>(m_count * 3 / 2, m_limit));
|
||||
|
||||
} else if (m_count > (m_limit - m_start) ) {
|
||||
// Not enough left free room. Just go back at the beginning
|
||||
m_start = 0;
|
||||
// Orphan the buffer to avoid synchronization
|
||||
allocate(m_limit);
|
||||
}
|
||||
|
||||
gl_BufferSubData(m_target, m_stride * m_start, m_stride * m_count, src);
|
||||
}
|
||||
|
||||
void map_upload(const void* src, uint32 count)
|
||||
{
|
||||
// Upload the data to the buffer
|
||||
void* dst;
|
||||
if (Map(&dst, count)) {
|
||||
// FIXME which one to use
|
||||
|
@ -80,14 +98,16 @@ class GSBufferOGL {
|
|||
}
|
||||
}
|
||||
|
||||
void upload(const void* src, uint32 count)
|
||||
{
|
||||
if (m_sub_data_config) {
|
||||
subdata_upload(src, count);
|
||||
} else {
|
||||
map_upload(src, count);
|
||||
}
|
||||
}
|
||||
|
||||
bool Map(void** pointer, uint32 count ) {
|
||||
#ifdef ENABLE_OGL_DEBUG
|
||||
GLint b_size = -1;
|
||||
gl_GetBufferParameteriv(m_target, GL_BUFFER_SIZE, &b_size);
|
||||
|
||||
if (b_size <= 0) return false;
|
||||
#endif
|
||||
|
||||
m_count = count;
|
||||
|
||||
// Note: For an explanation of the map flag
|
||||
|
@ -96,7 +116,7 @@ class GSBufferOGL {
|
|||
|
||||
// Current GPU buffer is really too small need to allocate a new one
|
||||
if (m_count > m_limit) {
|
||||
allocate(std::max<int>(m_count * 3 / 2, m_default_size));
|
||||
allocate(std::max<int>(m_count * 3 / 2, m_limit));
|
||||
|
||||
} else if (m_count > (m_limit - m_start) ) {
|
||||
// Not enough left free room. Just go back at the beginning
|
||||
|
@ -113,13 +133,7 @@ class GSBufferOGL {
|
|||
|
||||
// Upload the data to the buffer
|
||||
*pointer = (uint8*) gl_MapBufferRange(m_target, m_stride*m_start, m_stride*m_count, map_flags);
|
||||
//fprintf(stderr, "Map %x from %d to %d\n", *pointer, m_start, m_start+m_count);
|
||||
#ifdef ENABLE_OGL_DEBUG
|
||||
if (*pointer == NULL) {
|
||||
fprintf(stderr, "CRITICAL ERROR map failed for vb!!!\n");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,7 +46,6 @@
|
|||
|
||||
struct vertex
|
||||
{
|
||||
//vec4 p;
|
||||
vec4 t;
|
||||
vec4 tp;
|
||||
vec4 c;
|
||||
|
@ -69,17 +68,14 @@ layout(location = 0) out vertex VSout;
|
|||
#define VSout_c (VSout.c)
|
||||
#else
|
||||
#ifdef DISABLE_SSO
|
||||
//out vec4 SHADERp;
|
||||
out vec4 SHADERt;
|
||||
out vec4 SHADERtp;
|
||||
out vec4 SHADERc;
|
||||
#else
|
||||
//layout(location = 0) out vec4 SHADERp;
|
||||
layout(location = 0) out vec4 SHADERt;
|
||||
layout(location = 1) out vec4 SHADERtp;
|
||||
layout(location = 2) out vec4 SHADERc;
|
||||
#endif
|
||||
//#define VSout_p SHADERp
|
||||
#define VSout_t SHADERt
|
||||
#define VSout_tp SHADERtp
|
||||
#define VSout_c SHADERc
|
||||
|
@ -99,11 +95,13 @@ layout(std140) uniform cb20
|
|||
layout(std140, binding = 20) uniform cb20
|
||||
#endif
|
||||
{
|
||||
vec4 VertexScale;
|
||||
vec4 VertexOffset;
|
||||
vec2 VertexScale;
|
||||
vec2 VertexOffset;
|
||||
vec2 TextureScale;
|
||||
};
|
||||
|
||||
const float exp_min32 = exp2(-32);
|
||||
|
||||
void vs_main()
|
||||
{
|
||||
uint z;
|
||||
|
@ -119,35 +117,25 @@ void vs_main()
|
|||
// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
|
||||
// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
|
||||
|
||||
vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0);
|
||||
vec4 final_p = p * VertexScale - VertexOffset;
|
||||
// FIXME
|
||||
// FLIP vertically
|
||||
final_p.y *= -1.0f;
|
||||
vec3 p = vec3(i_p, z) - vec3(0.05f, 0.05f, 0.0f);
|
||||
p = p * vec3(VertexScale, exp_min32) - vec3(VertexOffset, 0.0f);
|
||||
|
||||
if(VS_LOGZ == 1)
|
||||
{
|
||||
final_p.z = log2(1.0f + float(z)) / 32.0f;
|
||||
p.z = log2(1.0f + float(z)) / 32.0f;
|
||||
}
|
||||
|
||||
//VSout_p = final_p;
|
||||
gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position
|
||||
#if VS_RTCOPY
|
||||
VSout_tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5;
|
||||
#endif
|
||||
|
||||
gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position
|
||||
|
||||
if(VS_TME != 0)
|
||||
{
|
||||
if(VS_FST != 0)
|
||||
{
|
||||
//VSout_t.xy = i_t * TextureScale;
|
||||
VSout_t.xy = i_uv * TextureScale;
|
||||
VSout_t.w = 1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
//VSout_t.xy = i_t;
|
||||
VSout_t.xy = i_st;
|
||||
VSout_t.w = i_q;
|
||||
}
|
||||
|
@ -188,7 +176,7 @@ layout(points, max_vertices = 1) out;
|
|||
void gs_main()
|
||||
{
|
||||
for(int i = 0; i < gl_in.length(); i++) {
|
||||
gl_Position = gl_in[i].gl_Position; // FIXME is it useful
|
||||
gl_Position = gl_in[i].gl_Position;
|
||||
GSout = GSin[i];
|
||||
EmitVertex();
|
||||
}
|
||||
|
@ -202,7 +190,7 @@ layout(line_strip, max_vertices = 2) out;
|
|||
void gs_main()
|
||||
{
|
||||
for(int i = 0; i < gl_in.length(); i++) {
|
||||
gl_Position = gl_in[i].gl_Position; // FIXME is it useful
|
||||
gl_Position = gl_in[i].gl_Position;
|
||||
GSout = GSin[i];
|
||||
#if GS_IIP == 0
|
||||
if (i == 0)
|
||||
|
@ -220,7 +208,7 @@ layout(triangle_strip, max_vertices = 3) out;
|
|||
void gs_main()
|
||||
{
|
||||
for(int i = 0; i < gl_in.length(); i++) {
|
||||
gl_Position = gl_in[i].gl_Position; // FIXME is it useful
|
||||
gl_Position = gl_in[i].gl_Position;
|
||||
GSout = GSin[i];
|
||||
#if GS_IIP == 0
|
||||
if (i == 0 || i == 1)
|
||||
|
@ -299,23 +287,19 @@ void gs_main()
|
|||
#ifdef FRAGMENT_SHADER
|
||||
#if __VERSION__ > 140 && !(defined(NO_STRUCT))
|
||||
layout(location = 0) in vertex PSin;
|
||||
//#define PSin_p (PSin.p)
|
||||
#define PSin_t (PSin.t)
|
||||
#define PSin_tp (PSin.tp)
|
||||
#define PSin_c (PSin.c)
|
||||
#else
|
||||
#ifdef DISABLE_SSO
|
||||
in vec4 SHADERp;
|
||||
in vec4 SHADERt;
|
||||
in vec4 SHADERtp;
|
||||
in vec4 SHADERc;
|
||||
#else
|
||||
//layout(location = 0) in vec4 SHADERp;
|
||||
layout(location = 0) in vec4 SHADERt;
|
||||
layout(location = 1) in vec4 SHADERtp;
|
||||
layout(location = 2) in vec4 SHADERc;
|
||||
#endif
|
||||
//#define PSin_p SHADERp
|
||||
#define PSin_t SHADERt
|
||||
#define PSin_tp SHADERtp
|
||||
#define PSin_c SHADERc
|
||||
|
@ -365,10 +349,7 @@ vec4 sample_c(vec2 uv)
|
|||
uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
|
||||
}
|
||||
|
||||
// FIXME I'm not sure it is a good solution to flip texture
|
||||
return texture(TextureSampler, uv);
|
||||
//FIXME another way to FLIP vertically
|
||||
//return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) );
|
||||
}
|
||||
|
||||
vec4 sample_p(float u)
|
||||
|
@ -698,12 +679,8 @@ vec4 ps_color()
|
|||
|
||||
void ps_main()
|
||||
{
|
||||
//FIXME
|
||||
vec4 c = ps_color();
|
||||
|
||||
// FIXME: I'm not sure about the value of others field
|
||||
// output.c1 = c.a * 2; // used for alpha blending
|
||||
|
||||
float alpha = c.a * 2;
|
||||
|
||||
if(PS_AOUT != 0) // 16 bit output
|
||||
|
|
|
@ -74,7 +74,6 @@ static const char* tfx_glsl =
|
|||
"\n"
|
||||
"struct vertex\n"
|
||||
"{\n"
|
||||
" //vec4 p;\n"
|
||||
" vec4 t;\n"
|
||||
" vec4 tp;\n"
|
||||
" vec4 c;\n"
|
||||
|
@ -97,17 +96,14 @@ static const char* tfx_glsl =
|
|||
"#define VSout_c (VSout.c)\n"
|
||||
"#else\n"
|
||||
"#ifdef DISABLE_SSO\n"
|
||||
"//out vec4 SHADERp;\n"
|
||||
"out vec4 SHADERt;\n"
|
||||
"out vec4 SHADERtp;\n"
|
||||
"out vec4 SHADERc;\n"
|
||||
"#else\n"
|
||||
"//layout(location = 0) out vec4 SHADERp;\n"
|
||||
"layout(location = 0) out vec4 SHADERt;\n"
|
||||
"layout(location = 1) out vec4 SHADERtp;\n"
|
||||
"layout(location = 2) out vec4 SHADERc;\n"
|
||||
"#endif\n"
|
||||
"//#define VSout_p SHADERp\n"
|
||||
"#define VSout_t SHADERt\n"
|
||||
"#define VSout_tp SHADERtp\n"
|
||||
"#define VSout_c SHADERc\n"
|
||||
|
@ -127,11 +123,13 @@ static const char* tfx_glsl =
|
|||
"layout(std140, binding = 20) uniform cb20\n"
|
||||
"#endif\n"
|
||||
"{\n"
|
||||
" vec4 VertexScale;\n"
|
||||
" vec4 VertexOffset;\n"
|
||||
" vec2 VertexScale;\n"
|
||||
" vec2 VertexOffset;\n"
|
||||
" vec2 TextureScale;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"const float exp_min32 = exp2(-32);\n"
|
||||
"\n"
|
||||
"void vs_main()\n"
|
||||
"{\n"
|
||||
" uint z;\n"
|
||||
|
@ -147,35 +145,25 @@ static const char* tfx_glsl =
|
|||
" // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel\n"
|
||||
" // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133\n"
|
||||
"\n"
|
||||
" vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0); \n"
|
||||
" vec4 final_p = p * VertexScale - VertexOffset;\n"
|
||||
" // FIXME\n"
|
||||
" // FLIP vertically\n"
|
||||
" final_p.y *= -1.0f;\n"
|
||||
" vec3 p = vec3(i_p, z) - vec3(0.05f, 0.05f, 0.0f);\n"
|
||||
" p = p * vec3(VertexScale, exp_min32) - vec3(VertexOffset, 0.0f);\n"
|
||||
"\n"
|
||||
" if(VS_LOGZ == 1)\n"
|
||||
" {\n"
|
||||
" final_p.z = log2(1.0f + float(z)) / 32.0f;\n"
|
||||
" p.z = log2(1.0f + float(z)) / 32.0f;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" //VSout_p = final_p;\n"
|
||||
" gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n"
|
||||
"#if VS_RTCOPY\n"
|
||||
" VSout_tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5;\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" gl_Position = vec4(p, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n"
|
||||
"\n"
|
||||
" if(VS_TME != 0)\n"
|
||||
" {\n"
|
||||
" if(VS_FST != 0)\n"
|
||||
" {\n"
|
||||
" //VSout_t.xy = i_t * TextureScale;\n"
|
||||
" VSout_t.xy = i_uv * TextureScale;\n"
|
||||
" VSout_t.w = 1.0f;\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" //VSout_t.xy = i_t;\n"
|
||||
" VSout_t.xy = i_st;\n"
|
||||
" VSout_t.w = i_q;\n"
|
||||
" }\n"
|
||||
|
@ -216,7 +204,7 @@ static const char* tfx_glsl =
|
|||
"void gs_main()\n"
|
||||
"{\n"
|
||||
" for(int i = 0; i < gl_in.length(); i++) {\n"
|
||||
" gl_Position = gl_in[i].gl_Position; // FIXME is it useful\n"
|
||||
" gl_Position = gl_in[i].gl_Position;\n"
|
||||
" GSout = GSin[i];\n"
|
||||
" EmitVertex();\n"
|
||||
" }\n"
|
||||
|
@ -230,7 +218,7 @@ static const char* tfx_glsl =
|
|||
"void gs_main()\n"
|
||||
"{\n"
|
||||
" for(int i = 0; i < gl_in.length(); i++) {\n"
|
||||
" gl_Position = gl_in[i].gl_Position; // FIXME is it useful\n"
|
||||
" gl_Position = gl_in[i].gl_Position;\n"
|
||||
" GSout = GSin[i];\n"
|
||||
"#if GS_IIP == 0\n"
|
||||
" if (i == 0)\n"
|
||||
|
@ -248,7 +236,7 @@ static const char* tfx_glsl =
|
|||
"void gs_main()\n"
|
||||
"{\n"
|
||||
" for(int i = 0; i < gl_in.length(); i++) {\n"
|
||||
" gl_Position = gl_in[i].gl_Position; // FIXME is it useful\n"
|
||||
" gl_Position = gl_in[i].gl_Position;\n"
|
||||
" GSout = GSin[i];\n"
|
||||
"#if GS_IIP == 0\n"
|
||||
" if (i == 0 || i == 1)\n"
|
||||
|
@ -327,23 +315,19 @@ static const char* tfx_glsl =
|
|||
"#ifdef FRAGMENT_SHADER\n"
|
||||
"#if __VERSION__ > 140 && !(defined(NO_STRUCT))\n"
|
||||
"layout(location = 0) in vertex PSin;\n"
|
||||
"//#define PSin_p (PSin.p)\n"
|
||||
"#define PSin_t (PSin.t)\n"
|
||||
"#define PSin_tp (PSin.tp)\n"
|
||||
"#define PSin_c (PSin.c)\n"
|
||||
"#else\n"
|
||||
"#ifdef DISABLE_SSO\n"
|
||||
"in vec4 SHADERp;\n"
|
||||
"in vec4 SHADERt;\n"
|
||||
"in vec4 SHADERtp;\n"
|
||||
"in vec4 SHADERc;\n"
|
||||
"#else\n"
|
||||
"//layout(location = 0) in vec4 SHADERp;\n"
|
||||
"layout(location = 0) in vec4 SHADERt;\n"
|
||||
"layout(location = 1) in vec4 SHADERtp;\n"
|
||||
"layout(location = 2) in vec4 SHADERc;\n"
|
||||
"#endif\n"
|
||||
"//#define PSin_p SHADERp\n"
|
||||
"#define PSin_t SHADERt\n"
|
||||
"#define PSin_tp SHADERtp\n"
|
||||
"#define PSin_c SHADERc\n"
|
||||
|
@ -393,10 +377,7 @@ static const char* tfx_glsl =
|
|||
" uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // FIXME I'm not sure it is a good solution to flip texture\n"
|
||||
" return texture(TextureSampler, uv);\n"
|
||||
" //FIXME another way to FLIP vertically\n"
|
||||
" //return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"vec4 sample_p(float u)\n"
|
||||
|
@ -726,12 +707,8 @@ static const char* tfx_glsl =
|
|||
"\n"
|
||||
"void ps_main()\n"
|
||||
"{\n"
|
||||
" //FIXME\n"
|
||||
" vec4 c = ps_color();\n"
|
||||
"\n"
|
||||
" // FIXME: I'm not sure about the value of others field\n"
|
||||
" // output.c1 = c.a * 2; // used for alpha blending\n"
|
||||
"\n"
|
||||
" float alpha = c.a * 2;\n"
|
||||
"\n"
|
||||
" if(PS_AOUT != 0) // 16 bit output\n"
|
||||
|
|
Loading…
Reference in New Issue