diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 245e6a2a..56bb3cb5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,8 @@ add_library(core STATIC FIFO.h GBACart.cpp GPU.cpp + GPU_OpenGL.cpp + GPU_OpenGL_shaders.h GPU2D.cpp GPU3D.cpp GPU3D_OpenGL.cpp diff --git a/src/GPU.cpp b/src/GPU.cpp index 42bfc566..993086eb 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -985,6 +985,8 @@ void StartScanline(u32 line) GPU2D_A->VBlank(); GPU2D_B->VBlank(); GPU3D::VBlank(); + + if (Accelerated) GLCompositor::RenderFrame(); } else if (VCount == 144) { diff --git a/src/GPU.h b/src/GPU.h index 661a7d91..e85a5b4c 100644 --- a/src/GPU.h +++ b/src/GPU.h @@ -422,6 +422,20 @@ void SetDispStat(u32 cpu, u16 val); void SetVCount(u16 val); +namespace GLCompositor +{ + +bool Init(); +void DeInit(); +void Reset(); + +void UpdateDisplaySettings(); + +void RenderFrame(); +void BindOutputTexture(); + +} + } #endif diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index 1c6cf0cf..6f950b73 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -728,6 +728,8 @@ u32 GPU2D::ColorComposite(int i, u32 val1, u32 val2) case 3: return ColorBrightnessDown(val1, EVY); case 4: return ColorBlend5(val1, val2); } + + return val1; } diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 4f146ae7..e687e37d 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -616,7 +616,7 @@ int InitRenderer(bool hasGL) if (!GLRenderer::Init()) renderer = 0; } - +printf("renderer: %d\n", renderer); if (renderer == 0) SoftRenderer::Init(); Renderer = renderer; diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index b1bcaa11..dcc4b6b2 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -29,6 +29,8 @@ namespace GPU3D namespace GLRenderer { +using namespace OpenGL; + // GL version requirements // * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS) // * UBO: 3.1 @@ -142,7 +144,7 @@ bool BuildRenderShader(u32 flags, const char* vs, const char* fs) strcpy(&fsbuf[headerlen], kRenderFSCommon); strcpy(&fsbuf[headerlen + fsclen], fs); - bool ret = OpenGL_BuildShaderProgram(vsbuf, fsbuf, RenderShader[flags], shadername); + bool ret = OpenGL::BuildShaderProgram(vsbuf, fsbuf, RenderShader[flags], shadername); delete[] vsbuf; delete[] fsbuf; @@ -158,7 +160,7 @@ bool BuildRenderShader(u32 flags, const char* vs, const char* fs) glBindFragDataLocation(prog, 0, "oColor"); glBindFragDataLocation(prog, 1, "oAttr"); - if (!OpenGL_LinkShaderProgram(RenderShader[flags])) + if (!OpenGL::LinkShaderProgram(RenderShader[flags])) return false; GLint uni_id = glGetUniformBlockIndex(prog, "uConfig"); @@ -202,14 +204,14 @@ bool Init() glClearDepth(1.0); - if (!OpenGL_BuildShaderProgram(kClearVS, kClearFS, ClearShaderPlain, "ClearShader")) + if (!OpenGL::BuildShaderProgram(kClearVS, kClearFS, ClearShaderPlain, "ClearShader")) return false; glBindAttribLocation(ClearShaderPlain[2], 0, "vPosition"); glBindFragDataLocation(ClearShaderPlain[2], 0, "oColor"); glBindFragDataLocation(ClearShaderPlain[2], 1, "oAttr"); - if (!OpenGL_LinkShaderProgram(ClearShaderPlain)) + if (!OpenGL::LinkShaderProgram(ClearShaderPlain)) return false; ClearUniformLoc[0] = glGetUniformLocation(ClearShaderPlain[2], "uColor"); @@ -237,15 +239,15 @@ bool Init() kRenderVS_W, kRenderFS_WSM)) return false; - if (!OpenGL_BuildShaderProgram(kFinalPassVS, kFinalPassEdgeFS, FinalPassEdgeShader, "FinalPassEdgeShader")) + if (!OpenGL::BuildShaderProgram(kFinalPassVS, kFinalPassEdgeFS, FinalPassEdgeShader, "FinalPassEdgeShader")) return false; - if (!OpenGL_BuildShaderProgram(kFinalPassVS, kFinalPassFogFS, FinalPassFogShader, "FinalPassFogShader")) + if (!OpenGL::BuildShaderProgram(kFinalPassVS, kFinalPassFogFS, FinalPassFogShader, "FinalPassFogShader")) return false; glBindAttribLocation(FinalPassEdgeShader[2], 0, "vPosition"); glBindFragDataLocation(FinalPassEdgeShader[2], 0, "oColor"); - if (!OpenGL_LinkShaderProgram(FinalPassEdgeShader)) + if (!OpenGL::LinkShaderProgram(FinalPassEdgeShader)) return false; uni_id = glGetUniformBlockIndex(FinalPassEdgeShader[2], "uConfig"); @@ -261,7 +263,7 @@ bool Init() glBindAttribLocation(FinalPassFogShader[2], 0, "vPosition"); glBindFragDataLocation(FinalPassFogShader[2], 0, "oColor"); - if (!OpenGL_LinkShaderProgram(FinalPassFogShader)) + if (!OpenGL::LinkShaderProgram(FinalPassFogShader)) return false; uni_id = glGetUniformBlockIndex(FinalPassFogShader[2], "uConfig"); @@ -371,11 +373,19 @@ bool Init() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB5_A1, 1024, 48, 0, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, NULL); + if (!GPU::GLCompositor::Init()) + { + // TODO: clean up things? fail more gracefully?? + return false; + } + return true; } void DeInit() { + GPU::GLCompositor::DeInit(); + glDeleteTextures(1, &TexMemID); glDeleteTextures(1, &TexPalMemID); @@ -392,12 +402,13 @@ void DeInit() for (int i = 0; i < 16; i++) { if (!RenderShader[i][2]) continue; - OpenGL_DeleteShaderProgram(RenderShader[i]); + OpenGL::DeleteShaderProgram(RenderShader[i]); } } void Reset() { + GPU::GLCompositor::Reset(); } void UpdateDisplaySettings() @@ -480,6 +491,8 @@ void UpdateDisplaySettings() //glLineWidth(scale); //glLineWidth(1.5); + + GPU::GLCompositor::UpdateDisplaySettings(); } diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp new file mode 100644 index 00000000..c9d31f10 --- /dev/null +++ b/src/GPU_OpenGL.cpp @@ -0,0 +1,207 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include +#include +#include "NDS.h" +#include "GPU.h" +#include "Config.h" +#include "OpenGLSupport.h" +#include "GPU_OpenGL_shaders.h" + +namespace GPU +{ +namespace GLCompositor +{ + +using namespace OpenGL; + +int Scale; +int ScreenH, ScreenW; + +GLuint CompShader[1][3]; +GLuint CompScaleLoc[1]; + +GLuint CompVertexBufferID; +GLuint CompVertexArrayID; +float CompVertices[2 * 3*2 * 2]; // position + +GLuint CompScreenInputTex; +GLuint CompScreenOutputTex; +GLuint CompScreenOutputFB; + + +bool Init() +{ + if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Nearest, CompShader[0], "CompositorShader")) + //if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Linear, CompShader[0], "CompositorShader")) + //if (!OpenGL::BuildShaderProgram(kCompositorVS_xBRZ, kCompositorFS_xBRZ, CompShader[0], "CompositorShader")) + return false; + + for (int i = 0; i < 1; i++) + { + GLint uni_id; + + glBindAttribLocation(CompShader[i][2], 0, "vPosition"); + glBindFragDataLocation(CompShader[i][2], 0, "oColor"); + + if (!OpenGL::LinkShaderProgram(CompShader[i])) + return false; + + CompScaleLoc[i] = glGetUniformLocation(CompShader[i][2], "u3DScale"); + + glUseProgram(CompShader[i][2]); + uni_id = glGetUniformLocation(CompShader[i][2], "ScreenTex"); + glUniform1i(uni_id, 0); + uni_id = glGetUniformLocation(CompShader[i][2], "_3DTex"); + glUniform1i(uni_id, 1); + } + +#define SETVERTEX(i, x, y) \ + CompVertices[2*(i) + 0] = x; \ + CompVertices[2*(i) + 1] = y; + + // top screen + SETVERTEX(0, -1, 1); + SETVERTEX(1, 1, 0); + SETVERTEX(2, 1, 1); + SETVERTEX(3, -1, 1); + SETVERTEX(4, -1, 0); + SETVERTEX(5, 1, 0); + + // bottom screen + SETVERTEX(6, -1, 0); + SETVERTEX(7, 1, -1); + SETVERTEX(8, 1, 0); + SETVERTEX(9, -1, 0); + SETVERTEX(10, -1, -1); + SETVERTEX(11, 1, -1); + +#undef SETVERTEX + + glGenBuffers(1, &CompVertexBufferID); + glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID); + glBufferData(GL_ARRAY_BUFFER, sizeof(CompVertices), CompVertices, GL_STATIC_DRAW); + + glGenVertexArrays(1, &CompVertexArrayID); + glBindVertexArray(CompVertexArrayID); + glEnableVertexAttribArray(0); // position + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 2*4, (void*)(0)); + + glGenFramebuffers(1, &CompScreenOutputFB); + + glGenTextures(1, &CompScreenInputTex); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, CompScreenInputTex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI, 256*3 + 1, 192*2, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, NULL); + + glGenTextures(1, &CompScreenOutputTex); + glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + return true; +} + +void DeInit() +{ + glDeleteFramebuffers(1, &CompScreenOutputFB); + glDeleteTextures(1, &CompScreenInputTex); + glDeleteTextures(1, &CompScreenOutputTex); + + glDeleteVertexArrays(1, &CompVertexArrayID); + glDeleteBuffers(1, &CompVertexBufferID); + + for (int i = 0; i < 1; i++) + OpenGL::DeleteShaderProgram(CompShader[i]); +} + +void Reset() +{ +} + + +void UpdateDisplaySettings() +{ + int scale = Config::GL_ScaleFactor; + + Scale = scale; + ScreenW = 256 * scale; + ScreenH = 384 * scale; + + glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + GLenum fbassign[] = {GL_COLOR_ATTACHMENT0}; + glBindFramebuffer(GL_FRAMEBUFFER, CompScreenOutputFB); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CompScreenOutputTex, 0); + glDrawBuffers(1, fbassign); +} + + +void RenderFrame() +{ + glBindFramebuffer(GL_FRAMEBUFFER, CompScreenOutputFB); + + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_BLEND); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glViewport(0, 0, ScreenW, ScreenH); + + // TODO: select more shaders (filtering, etc) + OpenGL::UseShaderProgram(CompShader[0]); + glUniform1ui(CompScaleLoc[0], Scale); + + //if (RunningSomething) + { + int frontbuf = GPU::FrontBuffer; + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, CompScreenInputTex); + + if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1]) + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256*3 + 1, 192, GL_RGBA_INTEGER, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256*3 + 1, 192, GL_RGBA_INTEGER, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]); + } + + glActiveTexture(GL_TEXTURE1); + GPU3D::GLRenderer::SetupAccelFrame(); + + glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID); + glBindVertexArray(CompVertexArrayID); + glDrawArrays(GL_TRIANGLES, 0, 4*3); + } +} + +void BindOutputTexture() +{ + glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex); +} + +} +} diff --git a/src/GPU_OpenGL_shaders.h b/src/GPU_OpenGL_shaders.h new file mode 100644 index 00000000..ec975edb --- /dev/null +++ b/src/GPU_OpenGL_shaders.h @@ -0,0 +1,867 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef GPU_OPENGL_SHADERS_H +#define GPU_OPENGL_SHADERS_H + +const char* kCompositorVS = R"(#version 140 + +in vec2 vPosition; + +smooth out vec2 fTexcoord; + +void main() +{ + vec4 fpos; + fpos.xy = vPosition; + fpos.z = 0.0; + fpos.w = 1.0; + + gl_Position = fpos; + fTexcoord = (vPosition + vec2(1.0, 1.0)) * (vec2(256.0, 384.0) / 2.0); +} +)"; + +const char* kCompositorFS_Nearest = R"(#version 140 + +uniform uint u3DScale; + +uniform usampler2D ScreenTex; +uniform sampler2D _3DTex; + +smooth in vec2 fTexcoord; + +out vec4 oColor; + +void main() +{ + ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); + + ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); + int dispmode = mbright.b & 0x3; + + if (dispmode == 1) + { + ivec4 val1 = pixel; + ivec4 val2 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(256,0), 0)); + ivec4 val3 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(512,0), 0)); + + int compmode = val3.a & 0xF; + int eva, evb, evy; + + if (compmode == 4) + { + // 3D on top, blending + + float xpos = val3.r + fract(fTexcoord.x); + float ypos = mod(fTexcoord.y, 192); + ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra + * vec4(63,63,63,31)); + + if (_3dpix.a > 0) + { + eva = (_3dpix.a & 0x1F) + 1; + evb = 32 - eva; + + val1 = ((_3dpix * eva) + (val1 * evb)) >> 5; + if (eva <= 16) val1 += ivec4(1,1,1,0); + val1 = min(val1, 0x3F); + } + else + val1 = val2; + } + else if (compmode == 1) + { + // 3D on bottom, blending + + float xpos = val3.r + fract(fTexcoord.x); + float ypos = mod(fTexcoord.y, 192); + ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra + * vec4(63,63,63,31)); + + if (_3dpix.a > 0) + { + eva = val3.g; + evb = val3.b; + + val1 = ((val1 * eva) + (_3dpix * evb)) >> 4; + val1 = min(val1, 0x3F); + } + else + val1 = val2; + } + else if (compmode <= 3) + { + // 3D on top, normal/fade + + float xpos = val3.r + fract(fTexcoord.x); + float ypos = mod(fTexcoord.y, 192); + ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra + * vec4(63,63,63,31)); + + if (_3dpix.a > 0) + { + evy = val3.g; + + val1 = _3dpix; + if (compmode == 2) val1 += ((ivec4(0x3F,0x3F,0x3F,0) - val1) * evy) >> 4; + else if (compmode == 3) val1 -= (val1 * evy) >> 4; + } + else + val1 = val2; + } + + pixel = val1; + } + + if (dispmode != 0) + { + int brightmode = mbright.g >> 6; + if (brightmode == 1) + { + // up + int evy = mbright.r & 0x1F; + if (evy > 16) evy = 16; + + pixel += ((ivec4(0x3F,0x3F,0x3F,0) - pixel) * evy) >> 4; + } + else if (brightmode == 2) + { + // down + int evy = mbright.r & 0x1F; + if (evy > 16) evy = 16; + + pixel -= (pixel * evy) >> 4; + } + } + + pixel.rgb <<= 2; + pixel.rgb |= (pixel.rgb >> 6); + + // TODO: filters + + oColor = vec4(vec3(pixel.rgb) / 255.0, 1.0); +} +)"; + + + +const char* kCompositorFS_Linear = R"(#version 140 + +uniform uint u3DScale; + +uniform usampler2D ScreenTex; +uniform sampler2D _3DTex; + +smooth in vec2 fTexcoord; + +out vec4 oColor; + +ivec4 Get3DPixel(vec2 pos) +{ + return ivec4(texelFetch(_3DTex, ivec2(pos*u3DScale), 0).bgra + * vec4(63,63,63,31)); +} + +ivec4 GetFullPixel(ivec4 val1, ivec4 val2, ivec4 val3, ivec4 _3dpix) +{ + int compmode = val3.a & 0xF; + int eva, evb, evy; + + if (compmode == 4) + { + // 3D on top, blending + + if (_3dpix.a > 0) + { + eva = (_3dpix.a & 0x1F) + 1; + evb = 32 - eva; + + val1 = ((_3dpix * eva) + (val1 * evb)) >> 5; + if (eva <= 16) val1 += ivec4(1,1,1,0); + val1 = min(val1, 0x3F); + } + else + val1 = val2; + } + else if (compmode == 1) + { + // 3D on bottom, blending + + if (_3dpix.a > 0) + { + eva = val3.g; + evb = val3.b; + + val1 = ((val1 * eva) + (_3dpix * evb)) >> 4; + val1 = min(val1, 0x3F); + } + else + val1 = val2; + } + else if (compmode <= 3) + { + // 3D on top, normal/fade + + if (_3dpix.a > 0) + { + evy = val3.g; + + val1 = _3dpix; + if (compmode == 2) val1 += ((ivec4(0x3F,0x3F,0x3F,0) - val1) * evy) >> 4; + else if (compmode == 3) val1 -= (val1 * evy) >> 4; + } + else + val1 = val2; + } + + return val1; +} + +ivec4 imix(ivec4 a, ivec4 b, float x) +{ + return ivec4(vec4(a)*(1-x) + vec4(b)*x); +} + +void main() +{ + ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); + + ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); + int dispmode = mbright.b & 0x3; + + if (dispmode == 1) + { + ivec4 val1 = pixel; + ivec4 val2 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(256,0), 0)); + ivec4 val3 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(512,0), 0)); + + float xfract = fract(fTexcoord.x); + float yfract = fract(fTexcoord.y); + + float xpos = val3.r + xfract; + float ypos = mod(fTexcoord.y, 192); + ivec4 _3dpix = Get3DPixel(vec2(xpos,ypos)); + + ivec4 p00 = GetFullPixel(val1, val2, val3, _3dpix); + + int xdisp = 1 - int(step(255, fTexcoord.x)); + int ydisp = 1 - int(step(191, ypos)); + + ivec4 p01 = GetFullPixel(ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+0 ,0), 0)), + ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+256,0), 0)), + ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+512,0), 0)), + _3dpix); + + ivec4 p10 = GetFullPixel(ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(0+0 ,ydisp), 0)), + ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(0+256,ydisp), 0)), + ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(0+512,ydisp), 0)), + _3dpix); + + ivec4 p11 = GetFullPixel(ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+0 ,ydisp), 0)), + ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+256,ydisp), 0)), + ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+512,ydisp), 0)), + _3dpix); + + ivec4 pa = imix(p00, p01, xfract); + ivec4 pb = imix(p10, p11, xfract); + + pixel = imix(pa, pb, yfract); + } + + if (dispmode != 0) + { + int brightmode = mbright.g >> 6; + if (brightmode == 1) + { + // up + int evy = mbright.r & 0x1F; + if (evy > 16) evy = 16; + + pixel += ((ivec4(0x3F,0x3F,0x3F,0) - pixel) * evy) >> 4; + } + else if (brightmode == 2) + { + // down + int evy = mbright.r & 0x1F; + if (evy > 16) evy = 16; + + pixel -= (pixel * evy) >> 4; + } + } + + pixel.rgb <<= 2; + pixel.rgb |= (pixel.rgb >> 6); + + // TODO: filters + + oColor = vec4(vec3(pixel.rgb) / 255.0, 1.0); +} +)"; + + + + + + +// HUGE TEST ZONE ARRLGD + +const char* kCompositorVS_xBRZ = R"(#version 140 + +#define BLEND_NONE 0 +#define BLEND_NORMAL 1 +#define BLEND_DOMINANT 2 +#define LUMINANCE_WEIGHT 1.0 +#define EQUAL_COLOR_TOLERANCE 30.0/255.0 +#define STEEP_DIRECTION_THRESHOLD 2.2 +#define DOMINANT_DIRECTION_THRESHOLD 3.6 + +#if __VERSION__ >= 130 +#define COMPAT_VARYING out +#define COMPAT_ATTRIBUTE in +#define COMPAT_TEXTURE texture +#else +#define COMPAT_VARYING varying +#define COMPAT_ATTRIBUTE attribute +#define COMPAT_TEXTURE texture2D +#endif + +#ifdef GL_ES +#define COMPAT_PRECISION mediump +#else +#define COMPAT_PRECISION +#endif + +COMPAT_ATTRIBUTE vec2 vPosition; +COMPAT_VARYING vec4 TEX0; +COMPAT_VARYING vec4 t1; +COMPAT_VARYING vec4 t2; +COMPAT_VARYING vec4 t3; +COMPAT_VARYING vec4 t4; +COMPAT_VARYING vec4 t5; +COMPAT_VARYING vec4 t6; +COMPAT_VARYING vec4 t7; + +uniform COMPAT_PRECISION int FrameDirection; +uniform COMPAT_PRECISION int FrameCount; +uniform COMPAT_PRECISION vec2 OutputSize; +uniform COMPAT_PRECISION vec2 TextureSize; +uniform COMPAT_PRECISION vec2 InputSize; + +// vertex compatibility #defines +#define vTexCoord TEX0.xy +#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize +#define outsize vec4(OutputSize, 1.0 / OutputSize) + +void main() +{ + vec4 fpos; + fpos.xy = vPosition; + fpos.z = 0.0; + fpos.w = 1.0; + + gl_Position = fpos; + vec2 TexCoord = (vPosition + vec2(1.0, 1.0)) * (vec2(256.0, 384.0) / 2.0); + + + //gl_Position = MVPMatrix * VertexCoord; + //COL0 = COLOR; + TEX0.xy = TexCoord.xy; + vec2 ps = vec2(1,1);//vec2(SourceSize.z, SourceSize.w); + float dx = ps.x; + float dy = ps.y; + + // A1 B1 C1 + // A0 A B C C4 + // D0 D E F F4 + // G0 G H I I4 + // G5 H5 I5 + + t1 = vTexCoord.xxxy + vec4( -dx, 0.0, dx,-2.0*dy); // A1 B1 C1 + t2 = vTexCoord.xxxy + vec4( -dx, 0.0, dx, -dy); // A B C + t3 = vTexCoord.xxxy + vec4( -dx, 0.0, dx, 0.0); // D E F + t4 = vTexCoord.xxxy + vec4( -dx, 0.0, dx, dy); // G H I + t5 = vTexCoord.xxxy + vec4( -dx, 0.0, dx, 2.0*dy); // G5 H5 I5 + t6 = vTexCoord.xyyy + vec4(-2.0*dx,-dy, 0.0, dy); // A0 D0 G0 + t7 = vTexCoord.xyyy + vec4( 2.0*dx,-dy, 0.0, dy); // C4 F4 I4 +} +)"; + +const char* kCompositorFS_xBRZ = R"(#version 140 + +#define BLEND_NONE 0 +#define BLEND_NORMAL 1 +#define BLEND_DOMINANT 2 +#define LUMINANCE_WEIGHT 1.0 +#define EQUAL_COLOR_TOLERANCE 30.0/255.0 +#define STEEP_DIRECTION_THRESHOLD 2.2 +#define DOMINANT_DIRECTION_THRESHOLD 3.6 + +#if __VERSION__ >= 130 +#define COMPAT_VARYING in +//#define COMPAT_TEXTURE texture +#define FragColor oColor +#else +#define COMPAT_VARYING varying +#define FragColor gl_FragColor +//#define COMPAT_TEXTURE texture2D +#endif + +#ifdef GL_ES +#ifdef GL_FRAGMENT_PRECISION_HIGH +precision highp float; +#else +precision mediump float; +#endif +#define COMPAT_PRECISION mediump +#else +#define COMPAT_PRECISION +#endif + +uniform uint u3DScale; + +uniform usampler2D ScreenTex; +uniform sampler2D _3DTex; + +smooth in vec2 fTexcoord; + +out vec4 oColor; + +//uniform COMPAT_PRECISION vec2 OutputSize; +//uniform COMPAT_PRECISION vec2 TextureSize; +#define TextureSize vec2(256,384) +//uniform COMPAT_PRECISION vec2 InputSize; +//uniform sampler2D Texture; +#define Texture 1312 +COMPAT_VARYING vec4 TEX0; +COMPAT_VARYING vec4 t1; +COMPAT_VARYING vec4 t2; +COMPAT_VARYING vec4 t3; +COMPAT_VARYING vec4 t4; +COMPAT_VARYING vec4 t5; +COMPAT_VARYING vec4 t6; +COMPAT_VARYING vec4 t7; + +// fragment compatibility #defines +#define Source Texture +#define vTexCoord TEX0.xy + +#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize +#define outsize vec4(OutputSize, 1.0 / OutputSize) + + const float one_sixth = 1.0 / 6.0; + const float two_sixth = 2.0 / 6.0; + const float four_sixth = 4.0 / 6.0; + const float five_sixth = 5.0 / 6.0; + +vec4 Get2DPixel(vec2 texcoord, int level) +{ + ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(texcoord) + ivec2(level*256,0), 0)); + + return vec4(pixel) / vec4(63.0, 63.0, 63.0, 31.0); +} + +ivec4 Get3DPixel(vec2 pos) +{ + return ivec4(texelFetch(_3DTex, ivec2(pos*u3DScale), 0).bgra + * vec4(63,63,63,31)); +} + +float reduce(const vec3 color) +{ + return dot(color, vec3(65536.0, 256.0, 1.0)); +} + +float DistYCbCr(const vec3 pixA, const vec3 pixB) +{ + const vec3 w = vec3(0.2627, 0.6780, 0.0593); + const float scaleB = 0.5 / (1.0 - w.b); + const float scaleR = 0.5 / (1.0 - w.r); + vec3 diff = pixA - pixB; + float Y = dot(diff, w); + float Cb = scaleB * (diff.b - Y); + float Cr = scaleR * (diff.r - Y); + + return sqrt( ((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr) ); +} + +bool IsPixEqual(const vec3 pixA, const vec3 pixB) +{ + return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE); +} + +bool IsBlendingNeeded(const ivec4 blend) +{ + return any(notEqual(blend, ivec4(BLEND_NONE))); +} + +//--------------------------------------- +// Input Pixel Mapping: --|21|22|23|-- +// 19|06|07|08|09 +// 18|05|00|01|10 +// 17|04|03|02|11 +// --|15|14|13|-- +// +// Output Pixel Mapping: 20|21|22|23|24|25 +// 19|06|07|08|09|26 +// 18|05|00|01|10|27 +// 17|04|03|02|11|28 +// 16|15|14|13|12|29 +// 35|34|33|32|31|30 + +ivec4 GetFiltered2DPixel(int level) +{ + vec2 f = fract(vTexCoord.xy);// * SourceSize.xy); + + //--------------------------------------- + // Input Pixel Mapping: 20|21|22|23|24 + // 19|06|07|08|09 + // 18|05|00|01|10 + // 17|04|03|02|11 + // 16|15|14|13|12 + + vec3 src[25]; + + src[21] = Get2DPixel(t1.xw, level).rgb; + src[22] = Get2DPixel(t1.yw, level).rgb; + src[23] = Get2DPixel(t1.zw, level).rgb; + src[ 6] = Get2DPixel(t2.xw, level).rgb; + src[ 7] = Get2DPixel(t2.yw, level).rgb; + src[ 8] = Get2DPixel(t2.zw, level).rgb; + src[ 5] = Get2DPixel(t3.xw, level).rgb; + src[ 0] = Get2DPixel(t3.yw, level).rgb; + src[ 1] = Get2DPixel(t3.zw, level).rgb; + src[ 4] = Get2DPixel(t4.xw, level).rgb; + src[ 3] = Get2DPixel(t4.yw, level).rgb; + src[ 2] = Get2DPixel(t4.zw, level).rgb; + src[15] = Get2DPixel(t5.xw, level).rgb; + src[14] = Get2DPixel(t5.yw, level).rgb; + src[13] = Get2DPixel(t5.zw, level).rgb; + src[19] = Get2DPixel(t6.xy, level).rgb; + src[18] = Get2DPixel(t6.xz, level).rgb; + src[17] = Get2DPixel(t6.xw, level).rgb; + src[ 9] = Get2DPixel(t7.xy, level).rgb; + src[10] = Get2DPixel(t7.xz, level).rgb; + src[11] = Get2DPixel(t7.xw, level).rgb; + + float v[9]; + v[0] = reduce(src[0]); + v[1] = reduce(src[1]); + v[2] = reduce(src[2]); + v[3] = reduce(src[3]); + v[4] = reduce(src[4]); + v[5] = reduce(src[5]); + v[6] = reduce(src[6]); + v[7] = reduce(src[7]); + v[8] = reduce(src[8]); + + ivec4 blendResult = ivec4(BLEND_NONE); + + // Preprocess corners + // Pixel Tap Mapping: --|--|--|--|-- + // --|--|07|08|-- + // --|05|00|01|10 + // --|04|03|02|11 + // --|--|14|13|-- + // Corner (1, 1) + if ( ((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) == false) + { + float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); + float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; + blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|--|--|-- + // --|06|07|--|-- + // 18|05|00|01|-- + // 17|04|03|02|-- + // --|15|14|--|-- + // Corner (0, 1) + if ( ((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) == false) + { + float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); + float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; + blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|--|22|23|-- + // --|06|07|08|09 + // --|05|00|01|10 + // --|--|03|02|-- + // --|--|--|--|-- + // Corner (1, 0) + if ( ((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) == false) + { + float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); + float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; + blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + // Pixel Tap Mapping: --|21|22|--|-- + // 19|06|07|08|-- + // 18|05|00|01|-- + // --|04|03|--|-- + // --|--|--|--|-- + // Corner (0, 0) + if ( ((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) == false) + { + float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); + float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); + bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; + blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; + } + + vec3 dst[16]; + dst[ 0] = src[0]; + dst[ 1] = src[0]; + dst[ 2] = src[0]; + dst[ 3] = src[0]; + dst[ 4] = src[0]; + dst[ 5] = src[0]; + dst[ 6] = src[0]; + dst[ 7] = src[0]; + dst[ 8] = src[0]; + dst[ 9] = src[0]; + dst[10] = src[0]; + dst[11] = src[0]; + dst[12] = src[0]; + dst[13] = src[0]; + dst[14] = src[0]; + dst[15] = src[0]; + + // Scale pixel + if (IsBlendingNeeded(blendResult) == true) + { + float dist_01_04 = DistYCbCr(src[1], src[4]); + float dist_03_08 = DistYCbCr(src[3], src[8]); + bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[4]) && (v[5] != v[4]); + bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[8]) && (v[7] != v[8]); + bool needBlend = (blendResult[2] != BLEND_NONE); + bool doLineBlend = ( blendResult[2] >= BLEND_DOMINANT || + ((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[0], src[2]) == false) ) == false ); + + vec3 blendPix = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; + dst[ 2] = mix(dst[ 2], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[10] = mix(dst[10], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[11] = mix(dst[11], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[12] = mix(dst[12], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[13] = mix(dst[13], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[14] = mix(dst[14], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + dist_01_04 = DistYCbCr(src[7], src[2]); + dist_03_08 = DistYCbCr(src[1], src[6]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[2]) && (v[3] != v[2]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[6]) && (v[5] != v[6]); + needBlend = (blendResult[1] != BLEND_NONE); + doLineBlend = ( blendResult[1] >= BLEND_DOMINANT || + !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); + + blendPix = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; + dst[ 1] = mix(dst[ 1], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[ 7] = mix(dst[ 7], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[ 8] = mix(dst[ 8], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 9] = mix(dst[ 9], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[10] = mix(dst[10], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[11] = mix(dst[11], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + dist_01_04 = DistYCbCr(src[5], src[8]); + dist_03_08 = DistYCbCr(src[7], src[4]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[8]) && (v[1] != v[8]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[4]) && (v[3] != v[4]); + needBlend = (blendResult[0] != BLEND_NONE); + doLineBlend = ( blendResult[0] >= BLEND_DOMINANT || + !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || + (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || + (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); + + blendPix = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; + dst[ 0] = mix(dst[ 0], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[ 4] = mix(dst[ 4], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[ 5] = mix(dst[ 5], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 6] = mix(dst[ 6], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[ 7] = mix(dst[ 7], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 8] = mix(dst[ 8], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + + + dist_01_04 = DistYCbCr(src[3], src[6]); + dist_03_08 = DistYCbCr(src[5], src[2]); + haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[6]) && (v[7] != v[6]); + haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[2]) && (v[1] != v[2]); + needBlend = (blendResult[3] != BLEND_NONE); + doLineBlend = ( blendResult[3] >= BLEND_DOMINANT || + !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || + (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || + (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); + + blendPix = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; + dst[ 3] = mix(dst[ 3], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); + dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); + dst[13] = mix(dst[13], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); + dst[14] = mix(dst[14], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[15] = mix(dst[15], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); + dst[ 4] = mix(dst[ 4], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); + dst[ 5] = mix(dst[ 5], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); + dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); + } + + vec3 res = mix( mix( mix( mix(dst[ 6], dst[ 7], step(0.25, f.x)), mix(dst[ 8], dst[ 9], step(0.75, f.x)), step(0.50, f.x)), + mix( mix(dst[ 5], dst[ 0], step(0.25, f.x)), mix(dst[ 1], dst[10], step(0.75, f.x)), step(0.50, f.x)), step(0.25, f.y)), + mix( mix( mix(dst[ 4], dst[ 3], step(0.25, f.x)), mix(dst[ 2], dst[11], step(0.75, f.x)), step(0.50, f.x)), + mix( mix(dst[15], dst[14], step(0.25, f.x)), mix(dst[13], dst[12], step(0.75, f.x)), step(0.50, f.x)), step(0.75, f.y)), + step(0.50, f.y)); + + return ivec4(res * vec3(63,63,63), 0); +} + + +void main() +{ + vec2 fTexcoord = vTexCoord.xy; + + ivec4 pixel;// = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); + + ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); + int dispmode = mbright.b & 0x3; + + if (dispmode == 1) + { + ivec4 val1;// = pixel; + //ivec4 val2 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(256,0), 0)); + ivec4 val3 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(512,0), 0)); + + int compmode = val3.a & 0xF; + int eva, evb, evy; + + float xpos = val3.r + fract(fTexcoord.x); + float ypos = mod(fTexcoord.y, 192); + ivec4 _3dpix = Get3DPixel(vec2(xpos, ypos)); + + if (compmode == 4) + { + // 3D on top, blending + + if (_3dpix.a > 0) + { + eva = (_3dpix.a & 0x1F) + 1; + if (eva == 32) + { + val1 = _3dpix; + } + else + { + evb = 32 - eva; + + val1 = GetFiltered2DPixel(0); + + val1 = ((_3dpix * eva) + (val1 * evb)) >> 5; + if (eva <= 16) val1 += ivec4(1,1,1,0); + val1 = min(val1, 0x3F); + } + } + else + val1 = GetFiltered2DPixel(1); + } + else if (compmode == 1) + { + // 3D on bottom, blending + + if (_3dpix.a > 0) + { + eva = val3.g; + evb = val3.b; + + val1 = GetFiltered2DPixel(0); + + val1 = ((val1 * eva) + (_3dpix * evb)) >> 4; + val1 = min(val1, 0x3F); + } + else + val1 = GetFiltered2DPixel(1); + } + else if (compmode <= 3) + { + // 3D on top, normal/fade + + if (_3dpix.a > 0) + { + evy = val3.g; + + val1 = _3dpix; + if (compmode == 2) val1 += ((ivec4(0x3F,0x3F,0x3F,0) - val1) * evy) >> 4; + else if (compmode == 3) val1 -= (val1 * evy) >> 4; + } + else + val1 = GetFiltered2DPixel(1); + } + else + val1 = GetFiltered2DPixel(0); + + pixel = val1; + } + else + { + pixel = GetFiltered2DPixel(0); + } + + if (dispmode != 0) + { + int brightmode = mbright.g >> 6; + if (brightmode == 1) + { + // up + int evy = mbright.r & 0x1F; + if (evy > 16) evy = 16; + + pixel += ((ivec4(0x3F,0x3F,0x3F,0) - pixel) * evy) >> 4; + } + else if (brightmode == 2) + { + // down + int evy = mbright.r & 0x1F; + if (evy > 16) evy = 16; + + pixel -= (pixel * evy) >> 4; + } + } + + pixel.rgb <<= 2; + pixel.rgb |= (pixel.rgb >> 6); + + FragColor = vec4(vec3(pixel.rgb) / 255.0, 1.0); +} +)"; + + + + + + +#endif // GPU_OPENGL_SHADERS_H diff --git a/src/OpenGLSupport.cpp b/src/OpenGLSupport.cpp index f91af9bf..27b14803 100644 --- a/src/OpenGLSupport.cpp +++ b/src/OpenGLSupport.cpp @@ -19,18 +19,20 @@ #include "OpenGLSupport.h" +namespace OpenGL +{ DO_PROCLIST(DECLPROC); -bool OpenGL_Init() +bool Init() { DO_PROCLIST(LOADPROC); return true; } -bool OpenGL_BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name) +bool BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name) { int len; int res; @@ -89,7 +91,7 @@ bool OpenGL_BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, cons return true; } -bool OpenGL_LinkShaderProgram(GLuint* ids) +bool LinkShaderProgram(GLuint* ids) { int res; @@ -115,14 +117,16 @@ bool OpenGL_LinkShaderProgram(GLuint* ids) return true; } -void OpenGL_DeleteShaderProgram(GLuint* ids) +void DeleteShaderProgram(GLuint* ids) { glDeleteShader(ids[0]); glDeleteShader(ids[1]); glDeleteProgram(ids[2]); } -void OpenGL_UseShaderProgram(GLuint* ids) +void UseShaderProgram(GLuint* ids) { glUseProgram(ids[2]); } + +} diff --git a/src/OpenGLSupport.h b/src/OpenGLSupport.h index 5f92580b..360d2156 100644 --- a/src/OpenGLSupport.h +++ b/src/OpenGLSupport.h @@ -21,6 +21,8 @@ #include #include + +// TODO: different includes for each platform #include #include @@ -45,23 +47,11 @@ // if you need more OpenGL functions, add them to the macronator here -// TODO: handle conditionally loading certain functions for different GL versions - -#ifndef __WIN32__ - -#define DO_PROCLIST_1_3(func) - -#else - -#define DO_PROCLIST_1_3(func) \ - func(GLACTIVETEXTURE, glActiveTexture); \ - func(GLBLENDCOLOR, glBlendColor); \ - -#endif #define DO_PROCLIST(func) \ - DO_PROCLIST_1_3(func) \ + func(GLACTIVETEXTURE, glActiveTexture); \ + func(GLBLENDCOLOR, glBlendColor); \ \ func(GLGENFRAMEBUFFERS, glGenFramebuffers); \ func(GLDELETEFRAMEBUFFERS, glDeleteFramebuffers); \ @@ -122,14 +112,18 @@ func(GLGETSTRINGI, glGetStringi); \ +namespace OpenGL +{ + DO_PROCLIST(DECLPROC_EXT); +bool Init(); -bool OpenGL_Init(); +bool BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name); +bool LinkShaderProgram(GLuint* ids); +void DeleteShaderProgram(GLuint* ids); +void UseShaderProgram(GLuint* ids); -bool OpenGL_BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name); -bool OpenGL_LinkShaderProgram(GLuint* ids); -void OpenGL_DeleteShaderProgram(GLuint* ids); -void OpenGL_UseShaderProgram(GLuint* ids); +} #endif // OPENGLSUPPORT_H diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp index dd838d75..6b256e02 100644 --- a/src/frontend/qt_sdl/Platform.cpp +++ b/src/frontend/qt_sdl/Platform.cpp @@ -56,6 +56,7 @@ char* EmuDirectory; void emuStop(); +void* oglGetProcAddress(const char* proc); namespace Platform @@ -418,7 +419,7 @@ void Semaphore_Post(void* sema) void* GL_GetProcAddress(const char* proc) { - return NULL;//uiGLGetProcAddress(proc); + return oglGetProcAddress(proc); } diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index bb08a87e..096b91ce 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -45,6 +45,7 @@ #include "NDS.h" #include "GBACart.h" +#include "OpenGLSupport.h" #include "GPU.h" #include "SPU.h" #include "Wifi.h" @@ -258,6 +259,54 @@ EmuThread::EmuThread(QObject* parent) : QThread(parent) connect(this, SIGNAL(windowEmuPause()), mainWindow->actPause, SLOT(trigger())); connect(this, SIGNAL(windowEmuReset()), mainWindow->actReset, SLOT(trigger())); connect(this, SIGNAL(screenLayoutChange()), mainWindow->panel, SLOT(onScreenLayoutChanged())); + + initOpenGL(); +} + +void EmuThread::initOpenGL() +{ + QOpenGLContext* windowctx = mainWindow->getOGLContext(); + QSurfaceFormat format = windowctx->format(); + + oglSurface = new QOffscreenSurface(); + oglSurface->setFormat(format); + oglSurface->create(); + if (!oglSurface->isValid()) + { + // TODO handle this! + printf("oglSurface shat itself :(\n"); + delete oglSurface; + return; + } + + oglContext = new QOpenGLContext();//oglSurface); + oglContext->setFormat(oglSurface->format()); + oglContext->setShareContext(windowctx); + if (!oglContext->create()) + { + // TODO handle this! + printf("oglContext shat itself :(\n"); + delete oglContext; + delete oglSurface; + return; + } + + oglContext->moveToThread(this); +} + +void deinitOpenGL() +{ + // TODO!! +} + +void* oglGetProcAddress(const char* proc) +{ + return emuThread->oglGetProcAddress(proc); +} + +void* EmuThread::oglGetProcAddress(const char* proc) +{ + return (void*)oglContext->getProcAddress(proc); } void EmuThread::run() @@ -279,7 +328,11 @@ void EmuThread::run() } else*/ { - GPU3D::InitRenderer(false); + //GPU3D::InitRenderer(false); + bool res = oglContext->makeCurrent(oglSurface); + printf("good? %d\n", res); + OpenGL::Init(); + GPU3D::InitRenderer(res); } Input::Init(); @@ -755,6 +808,11 @@ void ScreenPanelGL::initializeGL() { initializeOpenGLFunctions(); + const GLubyte* renderer = glGetString(GL_RENDERER); // get renderer string + const GLubyte* version = glGetString(GL_VERSION); // version as a string + printf("OpenGL: renderer: %s\n", renderer); + printf("OpenGL: version: %s\n", version); + glClearColor(0, 0, 0, 1); screenShader = new QOpenGLShaderProgram(this); @@ -828,14 +886,24 @@ void ScreenPanelGL::paintGL() int frontbuf = GPU::FrontBuffer; glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, screenTexture); - if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1]) + if (true) { - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA, - GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 192, GL_RGBA, - GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]); + // hardware-accelerated render + GPU::GLCompositor::BindOutputTexture(); + } + else + { + // regular render + glBindTexture(GL_TEXTURE_2D, screenTexture); + + if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1]) + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 192, GL_RGBA, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]); + } } GLint filter = Config::ScreenFilter ? GL_LINEAR : GL_NEAREST; @@ -1141,6 +1209,13 @@ MainWindow::~MainWindow() { } +QOpenGLContext* MainWindow::getOGLContext() +{ + // TODO: check whether we can actually pull this! + QOpenGLWidget* glpanel = (QOpenGLWidget*)panel; + return glpanel->context(); +} + void MainWindow::resizeEvent(QResizeEvent* event) { int w = event->size().width(); @@ -1755,6 +1830,13 @@ int main(int argc, char** argv) } #endif + QSurfaceFormat format; + format.setDepthBufferSize(24); + format.setStencilBufferSize(8); + format.setVersion(3, 2); + format.setProfile(QSurfaceFormat::CoreProfile); + QSurfaceFormat::setDefaultFormat(format); + audioSync = SDL_CreateCond(); audioSyncLock = SDL_CreateMutex(); diff --git a/src/frontend/qt_sdl/main.h b/src/frontend/qt_sdl/main.h index 8a8c0417..67c93d03 100644 --- a/src/frontend/qt_sdl/main.h +++ b/src/frontend/qt_sdl/main.h @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -40,6 +41,11 @@ class EmuThread : public QThread public: explicit EmuThread(QObject* parent = nullptr); + void initOpenGL(); + void deinitOpenGL(); + + void* oglGetProcAddress(const char* proc); + void changeWindowTitle(char* title); // to be called from the UI thread @@ -67,6 +73,9 @@ private: volatile int EmuStatus; int PrevEmuStatus; int EmuRunning; + + QOffscreenSurface* oglSurface; + QOpenGLContext* oglContext; }; @@ -161,6 +170,8 @@ public: explicit MainWindow(QWidget* parent = nullptr); ~MainWindow(); + QOpenGLContext* getOGLContext(); + protected: void resizeEvent(QResizeEvent* event) override;