From c81bcccadc9ac8394ba8d4a836d7c954dd528751 Mon Sep 17 00:00:00 2001 From: Arisotura Date: Thu, 16 May 2019 16:27:45 +0200 Subject: [PATCH] BAHAHAHAHAHAHAHAA --- melonDS.cbp | 1 + src/GPU.cpp | 40 ++- src/GPU.h | 2 +- src/GPU2D.cpp | 175 ++++++---- src/GPU2D.h | 4 +- src/GPU3D.cpp | 4 +- src/GPU3D.h | 9 +- src/GPU3D_OpenGL43.cpp | 641 +--------------------------------- src/GPU3D_OpenGL43_shaders.h | 645 +++++++++++++++++++++++++++++++++++ src/GPU3D_Soft.cpp | 7 +- src/libui_sdl/main.cpp | 19 +- 11 files changed, 816 insertions(+), 731 deletions(-) create mode 100644 src/GPU3D_OpenGL43_shaders.h diff --git a/melonDS.cbp b/melonDS.cbp index ff01ebfb..0bdac7fd 100644 --- a/melonDS.cbp +++ b/melonDS.cbp @@ -107,6 +107,7 @@ + diff --git a/src/GPU.cpp b/src/GPU.cpp index f4e9fd46..1d073f53 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -74,6 +74,7 @@ u32 VRAMMap_ARM7[2]; int FrontBuffer; u32* Framebuffer[2][2]; int FBScale[2]; +bool Accelerated; GPU2D* GPU2D_A; GPU2D* GPU2D_B; @@ -88,9 +89,8 @@ bool Init() FrontBuffer = 0; Framebuffer[0][0] = NULL; Framebuffer[0][1] = NULL; Framebuffer[1][0] = NULL; Framebuffer[1][1] = NULL; - FBScale[0] = -1; FBScale[1] = -1; - //SetFramebufferScale(1); - SetFramebufferScale(1, 1); + FBScale[0] = -1; FBScale[1] = -1; Accelerated = false; + SetDisplaySettings(0, 0, false); return true; } @@ -247,13 +247,15 @@ void AssignFramebuffers() } } -void SetFramebufferScale(int top, int bottom) -{ - if (top != FBScale[0]) +void SetDisplaySettings(int topscale, int bottomscale, bool accel) +{accel=true; + if (topscale != FBScale[0] || accel != Accelerated) { - FBScale[0] = top; + FBScale[0] = accel ? 0 : topscale; - int fbsize = (256 * 192) << (FBScale[0] * 2); + int fbsize; + if (accel) fbsize = 256*3 * 192; + else fbsize = (256 * 192) << (FBScale[0] * 2); if (Framebuffer[0][0]) delete[] Framebuffer[0][0]; if (Framebuffer[1][0]) delete[] Framebuffer[1][0]; Framebuffer[0][0] = new u32[fbsize]; @@ -266,21 +268,23 @@ void SetFramebufferScale(int top, int bottom) if (NDS::PowerControl9 & (1<<15)) { GPU2D_A->SetFramebuffer(Framebuffer[backbuf][0]); - GPU2D_A->SetScale(FBScale[0]); - GPU3D::SetScale(FBScale[0]); + GPU2D_A->SetDisplaySettings(FBScale[0], accel); + GPU3D::SetDisplaySettings(topscale, accel); } else { GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]); - GPU2D_B->SetScale(FBScale[0]); + GPU2D_B->SetDisplaySettings(FBScale[0], accel); } } - if (bottom != FBScale[1]) + if (bottomscale != FBScale[1] || accel != Accelerated) { - FBScale[1] = bottom; + FBScale[1] = accel ? 0 : bottomscale; - int fbsize = (256 * 192) << (FBScale[1] * 2); + int fbsize; + if (accel) fbsize = 256*3 * 192; + else fbsize = (256 * 192) << (FBScale[1] * 2); if (Framebuffer[0][1]) delete[] Framebuffer[0][1]; if (Framebuffer[1][1]) delete[] Framebuffer[1][1]; Framebuffer[0][1] = new u32[fbsize]; @@ -293,15 +297,17 @@ void SetFramebufferScale(int top, int bottom) if (NDS::PowerControl9 & (1<<15)) { GPU2D_B->SetFramebuffer(Framebuffer[backbuf][1]); - GPU2D_B->SetScale(FBScale[1]); + GPU2D_B->SetDisplaySettings(FBScale[1], accel); } else { GPU2D_A->SetFramebuffer(Framebuffer[backbuf][1]); - GPU2D_A->SetScale(FBScale[1]); - GPU3D::SetScale(FBScale[1]); + GPU2D_A->SetDisplaySettings(FBScale[1], accel); + GPU3D::SetDisplaySettings(bottomscale, accel); } } + + Accelerated = accel; } diff --git a/src/GPU.h b/src/GPU.h index 2e47fc47..cfa8d7d8 100644 --- a/src/GPU.h +++ b/src/GPU.h @@ -75,7 +75,7 @@ void Stop(); void DoSavestate(Savestate* file); -void SetFramebufferScale(int top, int bottom); +void SetDisplaySettings(int topscale, int bottomscale, bool accel); void MapVRAM_AB(u32 bank, u8 cnt); diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index 3a99964a..34c17ff6 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -82,7 +82,6 @@ GPU2D::GPU2D(u32 num) { Num = num; - SetScale(0); } GPU2D::~GPU2D() @@ -216,12 +215,16 @@ void GPU2D::SetFramebuffer(u32* buf) Framebuffer = buf; } -void GPU2D::SetScale(int scale) +void GPU2D::SetDisplaySettings(int scale, bool accel) { + if (accel) scale = 0; + Accelerated = accel; + LineScale = scale; LineStride = 256 << (scale*2); - if (scale == 1) DrawPixel = DrawPixel_2x; + if (Accelerated) DrawPixel = DrawPixel_Accel; + else if (scale == 1) DrawPixel = DrawPixel_2x; else if (scale == 2) DrawPixel = DrawPixel_4x; else DrawPixel = DrawPixel_1x; } @@ -623,7 +626,8 @@ u32 GPU2D::ColorBrightnessDown(u32 val, u32 factor) void GPU2D::DrawScanline(u32 line) { - u32* dst = &Framebuffer[LineStride * line]; + int stride = Accelerated ? (256*3) : LineStride; + u32* dst = &Framebuffer[stride * line]; int n3dline = line; line = GPU::VCount; @@ -652,7 +656,7 @@ void GPU2D::DrawScanline(u32 line) u32 dispmode = DispCnt >> 16; dispmode &= (Num ? 0x1 : 0x3); - if (Num == 0) + if (Num == 0 && !Accelerated) _3DLine = GPU3D::GetLine(n3dline); // always render regular graphics @@ -663,13 +667,13 @@ void GPU2D::DrawScanline(u32 line) case 0: // screen off { for (int i = 0; i < LineStride; i++) - dst[i] = 0xFF3F3F3F; + dst[i] = 0x003F3F3F; } break; case 1: // regular display { - for (int i = 0; i < LineStride; i+=2) + for (int i = 0; i < stride; i+=2) *(u64*)&dst[i] = *(u64*)&BGOBJLine[i]; } break; @@ -783,6 +787,8 @@ void GPU2D::DrawScanline(u32 line) DoCapture(line, capwidth); } + if (Accelerated) return; + // master brightness if (dispmode != 0) { @@ -861,6 +867,8 @@ void GPU2D::DoCapture(u32 line, u32 width) u16* dst = (u16*)GPU::VRAM[dstvram]; u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width); + // TODO: handle 3D in accelerated mode!! + u32* srcA; if (CaptureCnt & (1<<24)) srcA = _3DLine; @@ -1272,88 +1280,91 @@ void GPU2D::DrawScanline_Mode1(u32 line) // color special effects // can likely be optimized - u32 bldcnteffect = (BlendCnt >> 6) & 0x3; - - for (int i = 0; i < LineStride; i++) + if (!Accelerated) { - int j = (i >> LineScale) & 0xFF; + u32 bldcnteffect = (BlendCnt >> 6) & 0x3; - u32 val1 = BGOBJLine[i]; - u32 val2 = BGOBJLine[4096+i]; - - u32 coloreffect, eva, evb; - - u32 flag1 = val1 >> 24; - u32 flag2 = val2 >> 24; - - u32 target2; - if (flag2 & 0x80) target2 = 0x1000; - else if (flag2 & 0x40) target2 = 0x0100; - else target2 = flag2 << 8; - - if ((flag1 & 0x80) && (BlendCnt & target2)) + for (int i = 0; i < LineStride; i++) { - // sprite blending + int j = (i >> LineScale) & 0xFF; - coloreffect = 1; + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[4096+i]; - if (flag1 & 0x40) + u32 coloreffect, eva, evb; + + u32 flag1 = val1 >> 24; + u32 flag2 = val2 >> 24; + + u32 target2; + if (flag2 & 0x80) target2 = 0x1000; + else if (flag2 & 0x40) target2 = 0x0100; + else target2 = flag2 << 8; + + if ((flag1 & 0x80) && (BlendCnt & target2)) { - eva = flag1 & 0x1F; - evb = 16 - eva; - } - else - { - eva = EVA; - evb = EVB; - } - } - else if ((flag1 & 0x40) && (BlendCnt & target2)) - { - // 3D layer blending + // sprite blending - BGOBJLine[i] = ColorBlend5(val1, val2); - continue; - } - else - { - if (flag1 & 0x80) flag1 = 0x10; - else if (flag1 & 0x40) flag1 = 0x01; + coloreffect = 1; - if ((BlendCnt & flag1) && (WindowMask[j] & 0x20)) - { - if ((bldcnteffect == 1) && (BlendCnt & target2)) + if (flag1 & 0x40) + { + eva = flag1 & 0x1F; + evb = 16 - eva; + } + else { - coloreffect = 1; eva = EVA; evb = EVB; } - else if (bldcnteffect >= 2) - coloreffect = bldcnteffect; + } + else if ((flag1 & 0x40) && (BlendCnt & target2)) + { + // 3D layer blending + + BGOBJLine[i] = ColorBlend5(val1, val2); + continue; + } + else + { + if (flag1 & 0x80) flag1 = 0x10; + else if (flag1 & 0x40) flag1 = 0x01; + + if ((BlendCnt & flag1) && (WindowMask[j] & 0x20)) + { + if ((bldcnteffect == 1) && (BlendCnt & target2)) + { + coloreffect = 1; + eva = EVA; + evb = EVB; + } + else if (bldcnteffect >= 2) + coloreffect = bldcnteffect; + else + coloreffect = 0; + } else coloreffect = 0; } - else - coloreffect = 0; - } - switch (coloreffect) - { - case 0: - BGOBJLine[i] = val1; - break; + switch (coloreffect) + { + case 0: + BGOBJLine[i] = val1; + break; - case 1: - BGOBJLine[i] = ColorBlend4(val1, val2, eva, evb); - break; + case 1: + BGOBJLine[i] = ColorBlend4(val1, val2, eva, evb); + break; - case 2: - BGOBJLine[i] = ColorBrightnessUp(val1, EVY); - break; + case 2: + BGOBJLine[i] = ColorBrightnessUp(val1, EVY); + break; - case 3: - BGOBJLine[i] = ColorBrightnessDown(val1, EVY); - break; + case 3: + BGOBJLine[i] = ColorBrightnessDown(val1, EVY); + break; + } } } @@ -1429,6 +1440,17 @@ void GPU2D::DrawPixel_4x(u32* dst, u16 color, u32 flag) *(u64*)(dst+3072+2) = val; } +void GPU2D::DrawPixel_Accel(u32* dst, u16 color, u32 flag) +{ + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + *(dst+512) = *(dst+256); + *(dst+256) = *dst; + *dst = r | (g << 8) | (b << 16) | flag; +} + void GPU2D::DrawBG_3D() { u16 xoff = BGXPos[0]; @@ -1445,7 +1467,18 @@ void GPU2D::DrawBG_3D() iend -= (xoff & 0xFF); } - if (LineScale == 1) + if (Accelerated) + { + for (; i < iend; i++) + { + if (!(WindowMask[i] & 0x01)) continue; + + BGOBJLine[i+512] = BGOBJLine[i+256]; + BGOBJLine[i+256] = BGOBJLine[i]; + BGOBJLine[i] = 0x40000000; // 3D-layer placeholder + } + } + else if (LineScale == 1) { for (; i < iend; i++) { diff --git a/src/GPU2D.h b/src/GPU2D.h index aca1f7e4..fc420a51 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -31,7 +31,7 @@ public: void SetEnabled(bool enable) { Enabled = enable; } void SetFramebuffer(u32* buf); - void SetScale(int scale); + void SetDisplaySettings(int scale, bool accel); u8 Read8(u32 addr); u16 Read16(u32 addr); @@ -71,6 +71,7 @@ private: u32 LineStride; u32 LineScale; + bool Accelerated; u32 BGOBJLine[1024*4 * 2]; u32* _3DLine; @@ -136,6 +137,7 @@ private: static void DrawPixel_1x(u32* dst, u16 color, u32 flag); static void DrawPixel_2x(u32* dst, u16 color, u32 flag); static void DrawPixel_4x(u32* dst, u16 color, u32 flag); + static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); void (*DrawPixel)(u32* dst, u16 color, u32 flag); void DrawBG_3D(); diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 8c0588d2..61629a74 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -607,9 +607,9 @@ void SetEnabled(bool geometry, bool rendering) if (!rendering) ResetRenderingState(); } -void SetScale(int scale) +void SetDisplaySettings(int scale, bool accel) { - GLRenderer43::SetScale(scale); + GLRenderer43::SetDisplaySettings(scale, accel); } diff --git a/src/GPU3D.h b/src/GPU3D.h index 68bc696f..2fe5beeb 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -97,7 +97,7 @@ void Reset(); void DoSavestate(Savestate* file); void SetEnabled(bool geometry, bool rendering); -void SetScale(int scale); +void SetDisplaySettings(int scale, bool accel); void ExecuteCommand(); @@ -110,6 +110,7 @@ void VCount144(); void VBlank(); void VCount215(); u32* GetLine(int line); +void SetupAccelFrame(); void WriteToGXFIFO(u32 val); @@ -127,13 +128,14 @@ bool Init(); void DeInit(); void Reset(); -void SetScale(int scale); +void SetDisplaySettings(int scale, bool accel); void SetupRenderThread(); void VCount144(); void RenderFrame(); u32* GetLine(int line); +void SetupAccelFrame(); } @@ -144,11 +146,12 @@ bool Init(); void DeInit(); void Reset(); -void SetScale(int scale); +void SetDisplaySettings(int scale, bool accel); void VCount144(); void RenderFrame(); u32* GetLine(int line); +void SetupAccelFrame(); } diff --git a/src/GPU3D_OpenGL43.cpp b/src/GPU3D_OpenGL43.cpp index e76db7f0..6db9a183 100644 --- a/src/GPU3D_OpenGL43.cpp +++ b/src/GPU3D_OpenGL43.cpp @@ -21,6 +21,7 @@ #include "NDS.h" #include "GPU.h" #include "OpenGLSupport.h" +#include "GPU3D_OpenGL43_shaders.h" namespace GPU3D { @@ -33,631 +34,6 @@ namespace GLRenderer43 // * UBO: 3.1 // * glMemoryBarrier: 4.2 -// TODO: consider other way to handle uniforms (UBO?) - -#define kShaderHeader "#version 430" - - -const char* kClearVS = kShaderHeader R"( - -layout(location=0) in vec2 vPosition; - -layout(location=1) uniform uint uDepth; - -void main() -{ - float fdepth = (float(uDepth) / 8388608.0) - 1.0; - gl_Position = vec4(vPosition, fdepth, 1.0); -} -)"; - -const char* kClearFS = kShaderHeader R"( - -layout(location=0) uniform uvec4 uColor; -layout(location=2) uniform uint uOpaquePolyID; -layout(location=3) uniform uint uFogFlag; - -layout(location=0) out vec4 oColor; -layout(location=1) out uvec3 oAttr; - -void main() -{ - oColor = vec4(uColor).bgra / 31.0; - oAttr.r = 0; - oAttr.g = uOpaquePolyID; - oAttr.b = 0; -} -)"; - - -const char* kRenderVSCommon = R"( - -layout(std140, binding=0) uniform uConfig -{ - vec2 uScreenSize; - uint uDispCnt; - vec4 uToonColors[32]; -}; - -layout(location=0) in uvec4 vPosition; -layout(location=1) in uvec4 vColor; -layout(location=2) in ivec2 vTexcoord; -layout(location=3) in uvec3 vPolygonAttr; - -smooth out vec4 fColor; -smooth out vec2 fTexcoord; -flat out uvec3 fPolygonAttr; -)"; - -const char* kRenderFSCommon = R"( - -layout(binding=0) uniform usampler2D TexMem; -layout(binding=1) uniform sampler2D TexPalMem; - -layout(std140, binding=0) uniform uConfig -{ - vec2 uScreenSize; - uint uDispCnt; - vec4 uToonColors[32]; -}; - -smooth in vec4 fColor; -smooth in vec2 fTexcoord; -flat in uvec3 fPolygonAttr; - -layout(location=0) out vec4 oColor; -layout(location=1) out uvec3 oAttr; - -int TexcoordWrap(int c, int maxc, uint mode) -{ - if ((mode & (1<<0)) != 0) - { - if ((mode & (1<<2)) != 0 && (c & maxc) != 0) - return (maxc-1) - (c & (maxc-1)); - else - return (c & (maxc-1)); - } - else - return clamp(c, 0, maxc-1); -} - -vec4 TextureFetch_A3I5(ivec2 addr, ivec4 st, uint wrapmode) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x); - uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); - - pixel.a = (pixel.r & 0xE0); - pixel.a = (pixel.a >> 3) + (pixel.a >> 6); - pixel.r &= 0x1F; - - addr.y = (addr.y << 3) + int(pixel.r); - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, float(pixel.a)/31.0); -} - -vec4 TextureFetch_I2(ivec2 addr, ivec4 st, uint wrapmode, float alpha0) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x) >> 2; - uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); - pixel.r >>= (2 * (st.x & 3)); - pixel.r &= 0x03; - - addr.y = (addr.y << 2) + int(pixel.r); - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, max(step(1,pixel.r),alpha0)); -} - -vec4 TextureFetch_I4(ivec2 addr, ivec4 st, uint wrapmode, float alpha0) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x) >> 1; - uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); - if ((st.x & 1) != 0) pixel.r >>= 4; - else pixel.r &= 0x0F; - - addr.y = (addr.y << 3) + int(pixel.r); - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, max(step(1,pixel.r),alpha0)); -} - -vec4 TextureFetch_I8(ivec2 addr, ivec4 st, uint wrapmode, float alpha0) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x); - uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); - - addr.y = (addr.y << 3) + int(pixel.r); - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, max(step(1,pixel.r),alpha0)); -} - -vec4 TextureFetch_Compressed(ivec2 addr, ivec4 st, uint wrapmode) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y & 0x3FC) * (st.z>>2)) + (st.x & 0x3FC) + (st.y & 0x3); - uvec4 p = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); - uint val = (p.r >> (2 * (st.x & 0x3))) & 0x3; - - int slot1addr = 0x20000 + ((addr.x & 0x1FFFC) >> 1); - if (addr.x >= 0x40000) slot1addr += 0x10000; - - uint palinfo; - p = texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0); - palinfo = p.r; - slot1addr++; - p = texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0); - palinfo |= (p.r << 8); - - addr.y = (addr.y << 3) + ((int(palinfo) & 0x3FFF) << 1); - palinfo >>= 14; - - if (val == 0) - { - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4(color.rgb, 1.0); - } - else if (val == 1) - { - addr.y++; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4(color.rgb, 1.0); - } - else if (val == 2) - { - if (palinfo == 1) - { - vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - addr.y++; - vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4((color0.rgb + color1.rgb) / 2.0, 1.0); - } - else if (palinfo == 3) - { - vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - addr.y++; - vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4((color0.rgb*5.0 + color1.rgb*3.0) / 8.0, 1.0); - } - else - { - addr.y += 2; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4(color.rgb, 1.0); - } - } - else - { - if (palinfo == 2) - { - addr.y += 3; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4(color.rgb, 1.0); - } - else if (palinfo == 3) - { - vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - addr.y++; - vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4((color0.rgb*3.0 + color1.rgb*5.0) / 8.0, 1.0); - } - else - { - return vec4(0.0); - } - } -} - -vec4 TextureFetch_A5I3(ivec2 addr, ivec4 st, uint wrapmode) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x); - uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); - - pixel.a = (pixel.r & 0xF8) >> 3; - pixel.r &= 0x07; - - addr.y = (addr.y << 3) + int(pixel.r); - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, float(pixel.a)/31.0); -} - -vec4 TextureFetch_Direct(ivec2 addr, ivec4 st, uint wrapmode) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x) << 1; - uvec4 pixelL = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); - addr.x++; - uvec4 pixelH = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); - - vec4 color; - color.r = float(pixelL.r & 0x1F) / 31.0; - color.g = float((pixelL.r >> 5) | ((pixelH.r & 0x03) << 3)) / 31.0; - color.b = float((pixelH.r & 0x7C) >> 2) / 31.0; - color.a = float(pixelH.r >> 7); - - return color; -} - -vec4 TextureLookup_Nearest(vec2 st) -{ - uint attr = fPolygonAttr.y; - uint paladdr = fPolygonAttr.z; - - float alpha0; - if ((attr & (1<<29)) != 0) alpha0 = 0.0; - else alpha0 = 1.0; - - int tw = 8 << int((attr >> 20) & 0x7); - int th = 8 << int((attr >> 23) & 0x7); - ivec4 st_full = ivec4(ivec2(st), tw, th); - - ivec2 vramaddr = ivec2(int(attr & 0xFFFF) << 3, int(paladdr)); - uint wrapmode = attr >> 16; - - uint type = (attr >> 26) & 0x7; - if (type == 5) return TextureFetch_Compressed(vramaddr, st_full, wrapmode); - else if (type == 2) return TextureFetch_I2 (vramaddr, st_full, wrapmode, alpha0); - else if (type == 3) return TextureFetch_I4 (vramaddr, st_full, wrapmode, alpha0); - else if (type == 4) return TextureFetch_I8 (vramaddr, st_full, wrapmode, alpha0); - else if (type == 1) return TextureFetch_A3I5 (vramaddr, st_full, wrapmode); - else if (type == 6) return TextureFetch_A5I3 (vramaddr, st_full, wrapmode); - else return TextureFetch_Direct (vramaddr, st_full, wrapmode); -} - -vec4 TextureLookup_Linear(vec2 texcoord) -{ - ivec2 intpart = ivec2(texcoord); - vec2 fracpart = fract(texcoord); - - uint attr = fPolygonAttr.y; - uint paladdr = fPolygonAttr.z; - - float alpha0; - if ((attr & (1<<29)) != 0) alpha0 = 0.0; - else alpha0 = 1.0; - - int tw = 8 << int((attr >> 20) & 0x7); - int th = 8 << int((attr >> 23) & 0x7); - ivec4 st_full = ivec4(intpart, tw, th); - - ivec2 vramaddr = ivec2(int(attr & 0xFFFF) << 3, int(paladdr)); - uint wrapmode = attr >> 16; - - vec4 A, B, C, D; - uint type = (attr >> 26) & 0x7; - if (type == 5) - { - A = TextureFetch_Compressed(vramaddr, st_full , wrapmode); - B = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); - C = TextureFetch_Compressed(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); - D = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); - } - else if (type == 2) - { - A = TextureFetch_I2(vramaddr, st_full , wrapmode, alpha0); - B = TextureFetch_I2(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); - C = TextureFetch_I2(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); - D = TextureFetch_I2(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); - } - else if (type == 3) - { - A = TextureFetch_I4(vramaddr, st_full , wrapmode, alpha0); - B = TextureFetch_I4(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); - C = TextureFetch_I4(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); - D = TextureFetch_I4(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); - } - else if (type == 4) - { - A = TextureFetch_I8(vramaddr, st_full , wrapmode, alpha0); - B = TextureFetch_I8(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); - C = TextureFetch_I8(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); - D = TextureFetch_I8(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); - } - else if (type == 1) - { - A = TextureFetch_A3I5(vramaddr, st_full , wrapmode); - B = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); - C = TextureFetch_A3I5(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); - D = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); - } - else if (type == 6) - { - A = TextureFetch_A5I3(vramaddr, st_full , wrapmode); - B = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); - C = TextureFetch_A5I3(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); - D = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); - } - else - { - A = TextureFetch_Direct(vramaddr, st_full , wrapmode); - B = TextureFetch_Direct(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); - C = TextureFetch_Direct(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); - D = TextureFetch_Direct(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); - } - - float fx = fracpart.x; - vec4 AB; - if (A.a < (0.5/31.0) && B.a < (0.5/31.0)) - AB = vec4(0); - else - { - //if (A.a < (0.5/31.0) || B.a < (0.5/31.0)) - // fx = step(0.5, fx); - - AB = mix(A, B, fx); - } - - fx = fracpart.x; - vec4 CD; - if (C.a < (0.5/31.0) && D.a < (0.5/31.0)) - CD = vec4(0); - else - { - //if (C.a < (0.5/31.0) || D.a < (0.5/31.0)) - // fx = step(0.5, fx); - - CD = mix(C, D, fx); - } - - fx = fracpart.y; - vec4 ret; - if (AB.a < (0.5/31.0) && CD.a < (0.5/31.0)) - ret = vec4(0); - else - { - //if (AB.a < (0.5/31.0) || CD.a < (0.5/31.0)) - // fx = step(0.5, fx); - - ret = mix(AB, CD, fx); - } - - return ret; -} - -vec4 FinalColor() -{ - vec4 col; - vec4 vcol = fColor; - uint blendmode = (fPolygonAttr.x >> 4) & 0x3; - - if (blendmode == 2) - { - if ((uDispCnt & (1<<1)) == 0) - { - // toon - vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; - vcol.rgb = tooncolor; - } - else - { - // highlight - vcol.rgb = vcol.rrr; - } - } - - if ((((fPolygonAttr.y >> 26) & 0x7) == 0) || ((uDispCnt & (1<<0)) == 0)) - { - // no texture - col = vcol; - } - else - { - vec4 tcol = TextureLookup_Nearest(fTexcoord); - //vec4 tcol = TextureLookup_Linear(fTexcoord); - - if ((blendmode & 1) != 0) - { - // decal - col.rgb = (tcol.rgb * tcol.a) + (vcol.rgb * (1.0-tcol.a)); - col.a = vcol.a; - } - else - { - // modulate - col = vcol * tcol; - } - } - - if (blendmode == 2) - { - if ((uDispCnt & (1<<1)) != 0) - { - vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; - col.rgb = min(col.rgb + tooncolor, 1.0); - } - } - - return col.bgra; -} -)"; - - -const char* kRenderVS_Z = R"( - -void main() -{ - uint attr = vPolygonAttr.x; - uint zshift = (attr >> 16) & 0x1F; - - vec4 fpos; - fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0; - fpos.z = (float(vPosition.z << zshift) / 8388608.0) - 1.0; - fpos.w = float(vPosition.w) / 65536.0f; - fpos.xyz *= fpos.w; - - fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); - fTexcoord = vec2(vTexcoord) / 16.0; - fPolygonAttr = vPolygonAttr; - - gl_Position = fpos; -} -)"; - -const char* kRenderVS_W = R"( - -smooth out float fZ; - -void main() -{ - uint attr = vPolygonAttr.x; - uint zshift = (attr >> 16) & 0x1F; - - vec4 fpos; - fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0; - fZ = float(vPosition.z << zshift) / 16777216.0; - fpos.w = float(vPosition.w) / 65536.0f; - fpos.xy *= fpos.w; - - fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); - fTexcoord = vec2(vTexcoord) / 16.0; - fPolygonAttr = vPolygonAttr; - - gl_Position = fpos; -} -)"; - - -const char* kRenderFS_ZO = R"( - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 30.5/31) discard; - - oColor = col; - oAttr.g = (fPolygonAttr.x >> 24) & 0x3F; -} -)"; - -const char* kRenderFS_WO = R"( - -smooth in float fZ; - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 30.5/31) discard; - - oColor = col; - oAttr.g = (fPolygonAttr.x >> 24) & 0x3F; - gl_FragDepth = fZ; -} -)"; - -const char* kRenderFS_ZT = R"( - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 0.5/31) discard; - if (col.a >= 30.5/31) discard; - - oColor = col; - oAttr.g = 0xFF; -} -)"; - -const char* kRenderFS_WT = R"( - -smooth in float fZ; - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 0.5/31) discard; - if (col.a >= 30.5/31) discard; - - oColor = col; - oAttr.g = 0xFF; - gl_FragDepth = fZ; -} -)"; - -const char* kRenderFS_ZSM = R"( - -void main() -{ - oColor = vec4(0,0,0,1); - oAttr.g = 0xFF; - oAttr.b = 1; -} -)"; - -const char* kRenderFS_WSM = R"( - -smooth in float fZ; - -void main() -{ - oColor = vec4(0,0,0,1); - oAttr.g = 0xFF; - oAttr.b = 1; - gl_FragDepth = fZ; -} -)"; - -const char* kRenderFS_ZS = R"( - -layout(binding=2) uniform usampler2D iAttrTex; -//layout(origin_upper_left) in vec4 gl_FragCoord; - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 0.5/31) discard; - if (col.a >= 30.5/31) discard; - - uvec4 iAttr = texelFetch(iAttrTex, ivec2(gl_FragCoord.xy), 0); - if (iAttr.b != 1) discard; - if (iAttr.g == ((fPolygonAttr.x >> 24) & 0x3F)) discard; - - oColor = col; -} -)"; - -const char* kRenderFS_WS = R"( - -layout(binding=2) uniform usampler2D iAttrTex; -//layout(origin_upper_left) in vec4 gl_FragCoord; - -smooth in float fZ; - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 0.5/31) discard; - if (col.a >= 30.5/31) discard; - - uvec4 iAttr = texelFetch(iAttrTex, ivec2(gl_FragCoord.xy), 0); - if (iAttr.b != 1) discard; - if (iAttr.g == ((fPolygonAttr.x >> 24) & 0x3F)) discard; - - oColor = col; - gl_FragDepth = fZ; -} -)"; - enum { @@ -721,6 +97,7 @@ GLuint TexMemID; GLuint TexPalMemID; int ScaleFactor; +bool Accelerated; int ScreenW, ScreenH; GLuint FramebufferTex[4]; @@ -968,9 +345,10 @@ void Reset() // } -void SetScale(int scale) +void SetDisplaySettings(int scale, bool accel) { ScaleFactor = scale; + Accelerated = accel; // TODO: antialiasing setting ScreenW = 256 << scale; @@ -986,10 +364,12 @@ void SetScale(int scale) glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW/2, ScreenH/2, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID); - glBufferData(GL_PIXEL_PACK_BUFFER, ScreenW*ScreenH*4, NULL, GL_DYNAMIC_READ); + if (accel) glBufferData(GL_PIXEL_PACK_BUFFER, ScreenW*ScreenH*4, NULL, GL_DYNAMIC_READ); + else glBufferData(GL_PIXEL_PACK_BUFFER, 256*192, NULL, GL_DYNAMIC_READ); if (Framebuffer) delete[] Framebuffer; - Framebuffer = new u32[ScreenW*ScreenH]; + if (accel) Framebuffer = new u32[256*192]; + else Framebuffer = new u32[ScreenW*ScreenH]; } @@ -1533,5 +913,10 @@ u32* GetLine(int line) return &Framebuffer[stride * line]; } +void SetupAccelFrame() +{ + // +} + } } diff --git a/src/GPU3D_OpenGL43_shaders.h b/src/GPU3D_OpenGL43_shaders.h new file mode 100644 index 00000000..8a69566b --- /dev/null +++ b/src/GPU3D_OpenGL43_shaders.h @@ -0,0 +1,645 @@ +/* + Copyright 2016-2019 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef GPU3D_OPENGL43_SHADERS_H +#define GPU3D_OPENGL43_SHADERS_H + +#define kShaderHeader "#version 430" + + +const char* kClearVS = kShaderHeader R"( + +layout(location=0) in vec2 vPosition; + +layout(location=1) uniform uint uDepth; + +void main() +{ + float fdepth = (float(uDepth) / 8388608.0) - 1.0; + gl_Position = vec4(vPosition, fdepth, 1.0); +} +)"; + +const char* kClearFS = kShaderHeader R"( + +layout(location=0) uniform uvec4 uColor; +layout(location=2) uniform uint uOpaquePolyID; +layout(location=3) uniform uint uFogFlag; + +layout(location=0) out vec4 oColor; +layout(location=1) out uvec3 oAttr; + +void main() +{ + oColor = vec4(uColor).bgra / 31.0; + oAttr.r = 0; + oAttr.g = uOpaquePolyID; + oAttr.b = 0; +} +)"; + + +const char* kRenderVSCommon = R"( + +layout(std140, binding=0) uniform uConfig +{ + vec2 uScreenSize; + uint uDispCnt; + vec4 uToonColors[32]; +}; + +layout(location=0) in uvec4 vPosition; +layout(location=1) in uvec4 vColor; +layout(location=2) in ivec2 vTexcoord; +layout(location=3) in uvec3 vPolygonAttr; + +smooth out vec4 fColor; +smooth out vec2 fTexcoord; +flat out uvec3 fPolygonAttr; +)"; + +const char* kRenderFSCommon = R"( + +layout(binding=0) uniform usampler2D TexMem; +layout(binding=1) uniform sampler2D TexPalMem; + +layout(std140, binding=0) uniform uConfig +{ + vec2 uScreenSize; + uint uDispCnt; + vec4 uToonColors[32]; +}; + +smooth in vec4 fColor; +smooth in vec2 fTexcoord; +flat in uvec3 fPolygonAttr; + +layout(location=0) out vec4 oColor; +layout(location=1) out uvec3 oAttr; + +int TexcoordWrap(int c, int maxc, uint mode) +{ + if ((mode & (1<<0)) != 0) + { + if ((mode & (1<<2)) != 0 && (c & maxc) != 0) + return (maxc-1) - (c & (maxc-1)); + else + return (c & (maxc-1)); + } + else + return clamp(c, 0, maxc-1); +} + +vec4 TextureFetch_A3I5(ivec2 addr, ivec4 st, uint wrapmode) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x); + uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); + + pixel.a = (pixel.r & 0xE0); + pixel.a = (pixel.a >> 3) + (pixel.a >> 6); + pixel.r &= 0x1F; + + addr.y = (addr.y << 3) + int(pixel.r); + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, float(pixel.a)/31.0); +} + +vec4 TextureFetch_I2(ivec2 addr, ivec4 st, uint wrapmode, float alpha0) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x) >> 2; + uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); + pixel.r >>= (2 * (st.x & 3)); + pixel.r &= 0x03; + + addr.y = (addr.y << 2) + int(pixel.r); + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, max(step(1,pixel.r),alpha0)); +} + +vec4 TextureFetch_I4(ivec2 addr, ivec4 st, uint wrapmode, float alpha0) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x) >> 1; + uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); + if ((st.x & 1) != 0) pixel.r >>= 4; + else pixel.r &= 0x0F; + + addr.y = (addr.y << 3) + int(pixel.r); + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, max(step(1,pixel.r),alpha0)); +} + +vec4 TextureFetch_I8(ivec2 addr, ivec4 st, uint wrapmode, float alpha0) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x); + uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); + + addr.y = (addr.y << 3) + int(pixel.r); + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, max(step(1,pixel.r),alpha0)); +} + +vec4 TextureFetch_Compressed(ivec2 addr, ivec4 st, uint wrapmode) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y & 0x3FC) * (st.z>>2)) + (st.x & 0x3FC) + (st.y & 0x3); + uvec4 p = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); + uint val = (p.r >> (2 * (st.x & 0x3))) & 0x3; + + int slot1addr = 0x20000 + ((addr.x & 0x1FFFC) >> 1); + if (addr.x >= 0x40000) slot1addr += 0x10000; + + uint palinfo; + p = texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0); + palinfo = p.r; + slot1addr++; + p = texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0); + palinfo |= (p.r << 8); + + addr.y = (addr.y << 3) + ((int(palinfo) & 0x3FFF) << 1); + palinfo >>= 14; + + if (val == 0) + { + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4(color.rgb, 1.0); + } + else if (val == 1) + { + addr.y++; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4(color.rgb, 1.0); + } + else if (val == 2) + { + if (palinfo == 1) + { + vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + addr.y++; + vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4((color0.rgb + color1.rgb) / 2.0, 1.0); + } + else if (palinfo == 3) + { + vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + addr.y++; + vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4((color0.rgb*5.0 + color1.rgb*3.0) / 8.0, 1.0); + } + else + { + addr.y += 2; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4(color.rgb, 1.0); + } + } + else + { + if (palinfo == 2) + { + addr.y += 3; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4(color.rgb, 1.0); + } + else if (palinfo == 3) + { + vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + addr.y++; + vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4((color0.rgb*3.0 + color1.rgb*5.0) / 8.0, 1.0); + } + else + { + return vec4(0.0); + } + } +} + +vec4 TextureFetch_A5I3(ivec2 addr, ivec4 st, uint wrapmode) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x); + uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); + + pixel.a = (pixel.r & 0xF8) >> 3; + pixel.r &= 0x07; + + addr.y = (addr.y << 3) + int(pixel.r); + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, float(pixel.a)/31.0); +} + +vec4 TextureFetch_Direct(ivec2 addr, ivec4 st, uint wrapmode) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x) << 1; + uvec4 pixelL = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); + addr.x++; + uvec4 pixelH = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0); + + vec4 color; + color.r = float(pixelL.r & 0x1F) / 31.0; + color.g = float((pixelL.r >> 5) | ((pixelH.r & 0x03) << 3)) / 31.0; + color.b = float((pixelH.r & 0x7C) >> 2) / 31.0; + color.a = float(pixelH.r >> 7); + + return color; +} + +vec4 TextureLookup_Nearest(vec2 st) +{ + uint attr = fPolygonAttr.y; + uint paladdr = fPolygonAttr.z; + + float alpha0; + if ((attr & (1<<29)) != 0) alpha0 = 0.0; + else alpha0 = 1.0; + + int tw = 8 << int((attr >> 20) & 0x7); + int th = 8 << int((attr >> 23) & 0x7); + ivec4 st_full = ivec4(ivec2(st), tw, th); + + ivec2 vramaddr = ivec2(int(attr & 0xFFFF) << 3, int(paladdr)); + uint wrapmode = attr >> 16; + + uint type = (attr >> 26) & 0x7; + if (type == 5) return TextureFetch_Compressed(vramaddr, st_full, wrapmode); + else if (type == 2) return TextureFetch_I2 (vramaddr, st_full, wrapmode, alpha0); + else if (type == 3) return TextureFetch_I4 (vramaddr, st_full, wrapmode, alpha0); + else if (type == 4) return TextureFetch_I8 (vramaddr, st_full, wrapmode, alpha0); + else if (type == 1) return TextureFetch_A3I5 (vramaddr, st_full, wrapmode); + else if (type == 6) return TextureFetch_A5I3 (vramaddr, st_full, wrapmode); + else return TextureFetch_Direct (vramaddr, st_full, wrapmode); +} + +vec4 TextureLookup_Linear(vec2 texcoord) +{ + ivec2 intpart = ivec2(texcoord); + vec2 fracpart = fract(texcoord); + + uint attr = fPolygonAttr.y; + uint paladdr = fPolygonAttr.z; + + float alpha0; + if ((attr & (1<<29)) != 0) alpha0 = 0.0; + else alpha0 = 1.0; + + int tw = 8 << int((attr >> 20) & 0x7); + int th = 8 << int((attr >> 23) & 0x7); + ivec4 st_full = ivec4(intpart, tw, th); + + ivec2 vramaddr = ivec2(int(attr & 0xFFFF) << 3, int(paladdr)); + uint wrapmode = attr >> 16; + + vec4 A, B, C, D; + uint type = (attr >> 26) & 0x7; + if (type == 5) + { + A = TextureFetch_Compressed(vramaddr, st_full , wrapmode); + B = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); + C = TextureFetch_Compressed(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); + D = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); + } + else if (type == 2) + { + A = TextureFetch_I2(vramaddr, st_full , wrapmode, alpha0); + B = TextureFetch_I2(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); + C = TextureFetch_I2(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); + D = TextureFetch_I2(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); + } + else if (type == 3) + { + A = TextureFetch_I4(vramaddr, st_full , wrapmode, alpha0); + B = TextureFetch_I4(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); + C = TextureFetch_I4(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); + D = TextureFetch_I4(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); + } + else if (type == 4) + { + A = TextureFetch_I8(vramaddr, st_full , wrapmode, alpha0); + B = TextureFetch_I8(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); + C = TextureFetch_I8(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); + D = TextureFetch_I8(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); + } + else if (type == 1) + { + A = TextureFetch_A3I5(vramaddr, st_full , wrapmode); + B = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); + C = TextureFetch_A3I5(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); + D = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); + } + else if (type == 6) + { + A = TextureFetch_A5I3(vramaddr, st_full , wrapmode); + B = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); + C = TextureFetch_A5I3(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); + D = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); + } + else + { + A = TextureFetch_Direct(vramaddr, st_full , wrapmode); + B = TextureFetch_Direct(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); + C = TextureFetch_Direct(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); + D = TextureFetch_Direct(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); + } + + float fx = fracpart.x; + vec4 AB; + if (A.a < (0.5/31.0) && B.a < (0.5/31.0)) + AB = vec4(0); + else + { + //if (A.a < (0.5/31.0) || B.a < (0.5/31.0)) + // fx = step(0.5, fx); + + AB = mix(A, B, fx); + } + + fx = fracpart.x; + vec4 CD; + if (C.a < (0.5/31.0) && D.a < (0.5/31.0)) + CD = vec4(0); + else + { + //if (C.a < (0.5/31.0) || D.a < (0.5/31.0)) + // fx = step(0.5, fx); + + CD = mix(C, D, fx); + } + + fx = fracpart.y; + vec4 ret; + if (AB.a < (0.5/31.0) && CD.a < (0.5/31.0)) + ret = vec4(0); + else + { + //if (AB.a < (0.5/31.0) || CD.a < (0.5/31.0)) + // fx = step(0.5, fx); + + ret = mix(AB, CD, fx); + } + + return ret; +} + +vec4 FinalColor() +{ + vec4 col; + vec4 vcol = fColor; + uint blendmode = (fPolygonAttr.x >> 4) & 0x3; + + if (blendmode == 2) + { + if ((uDispCnt & (1<<1)) == 0) + { + // toon + vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; + vcol.rgb = tooncolor; + } + else + { + // highlight + vcol.rgb = vcol.rrr; + } + } + + if ((((fPolygonAttr.y >> 26) & 0x7) == 0) || ((uDispCnt & (1<<0)) == 0)) + { + // no texture + col = vcol; + } + else + { + vec4 tcol = TextureLookup_Nearest(fTexcoord); + //vec4 tcol = TextureLookup_Linear(fTexcoord); + + if ((blendmode & 1) != 0) + { + // decal + col.rgb = (tcol.rgb * tcol.a) + (vcol.rgb * (1.0-tcol.a)); + col.a = vcol.a; + } + else + { + // modulate + col = vcol * tcol; + } + } + + if (blendmode == 2) + { + if ((uDispCnt & (1<<1)) != 0) + { + vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; + col.rgb = min(col.rgb + tooncolor, 1.0); + } + } + + return col.bgra; +} +)"; + + +const char* kRenderVS_Z = R"( + +void main() +{ + uint attr = vPolygonAttr.x; + uint zshift = (attr >> 16) & 0x1F; + + vec4 fpos; + fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0; + fpos.z = (float(vPosition.z << zshift) / 8388608.0) - 1.0; + fpos.w = float(vPosition.w) / 65536.0f; + fpos.xyz *= fpos.w; + + fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); + fTexcoord = vec2(vTexcoord) / 16.0; + fPolygonAttr = vPolygonAttr; + + gl_Position = fpos; +} +)"; + +const char* kRenderVS_W = R"( + +smooth out float fZ; + +void main() +{ + uint attr = vPolygonAttr.x; + uint zshift = (attr >> 16) & 0x1F; + + vec4 fpos; + fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0; + fZ = float(vPosition.z << zshift) / 16777216.0; + fpos.w = float(vPosition.w) / 65536.0f; + fpos.xy *= fpos.w; + + fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); + fTexcoord = vec2(vTexcoord) / 16.0; + fPolygonAttr = vPolygonAttr; + + gl_Position = fpos; +} +)"; + + +const char* kRenderFS_ZO = R"( + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 30.5/31) discard; + + oColor = col; + oAttr.g = (fPolygonAttr.x >> 24) & 0x3F; +} +)"; + +const char* kRenderFS_WO = R"( + +smooth in float fZ; + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 30.5/31) discard; + + oColor = col; + oAttr.g = (fPolygonAttr.x >> 24) & 0x3F; + gl_FragDepth = fZ; +} +)"; + +const char* kRenderFS_ZT = R"( + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 0.5/31) discard; + if (col.a >= 30.5/31) discard; + + oColor = col; + oAttr.g = 0xFF; +} +)"; + +const char* kRenderFS_WT = R"( + +smooth in float fZ; + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 0.5/31) discard; + if (col.a >= 30.5/31) discard; + + oColor = col; + oAttr.g = 0xFF; + gl_FragDepth = fZ; +} +)"; + +const char* kRenderFS_ZSM = R"( + +void main() +{ + oColor = vec4(0,0,0,1); + oAttr.g = 0xFF; + oAttr.b = 1; +} +)"; + +const char* kRenderFS_WSM = R"( + +smooth in float fZ; + +void main() +{ + oColor = vec4(0,0,0,1); + oAttr.g = 0xFF; + oAttr.b = 1; + gl_FragDepth = fZ; +} +)"; + +const char* kRenderFS_ZS = R"( + +layout(binding=2) uniform usampler2D iAttrTex; +//layout(origin_upper_left) in vec4 gl_FragCoord; + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 0.5/31) discard; + if (col.a >= 30.5/31) discard; + + uvec4 iAttr = texelFetch(iAttrTex, ivec2(gl_FragCoord.xy), 0); + if (iAttr.b != 1) discard; + if (iAttr.g == ((fPolygonAttr.x >> 24) & 0x3F)) discard; + + oColor = col; +} +)"; + +const char* kRenderFS_WS = R"( + +layout(binding=2) uniform usampler2D iAttrTex; +//layout(origin_upper_left) in vec4 gl_FragCoord; + +smooth in float fZ; + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 0.5/31) discard; + if (col.a >= 30.5/31) discard; + + uvec4 iAttr = texelFetch(iAttrTex, ivec2(gl_FragCoord.xy), 0); + if (iAttr.b != 1) discard; + if (iAttr.g == ((fPolygonAttr.x >> 24) & 0x3F)) discard; + + oColor = col; + gl_FragDepth = fZ; +} +)"; + +#endif // GPU3D_OPENGL43_SHADERS_H diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index ac5cd937..20283d34 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -138,7 +138,7 @@ void Reset() SetupRenderThread(); } -void SetScale(int scale) +void SetDisplaySettings(int scale, bool accel) { printf("SOFT RENDERER SCALE FACTOR: TODO!!!\n"); } @@ -2122,5 +2122,10 @@ u32* GetLine(int line) return &ColorBuffer[(line * ScanlineWidth) + FirstPixelOffset]; } +void SetupAccelFrame() +{ + // TODO +} + } } diff --git a/src/libui_sdl/main.cpp b/src/libui_sdl/main.cpp index 47f5f13f..b77d67f0 100644 --- a/src/libui_sdl/main.cpp +++ b/src/libui_sdl/main.cpp @@ -240,8 +240,8 @@ void GLDrawing_DrawScreen() x1 = TopScreenRect.X + TopScreenRect.Width; y1 = TopScreenRect.Y + TopScreenRect.Height; - scwidth = 256 << ScreenScale[0]; - scheight = 192 << ScreenScale[0]; + scwidth = 256;// << ScreenScale[0]; + scheight = 192;// << ScreenScale[0]; switch (ScreenRotation) { @@ -286,8 +286,8 @@ void GLDrawing_DrawScreen() x1 = BottomScreenRect.X + BottomScreenRect.Width; y1 = BottomScreenRect.Y + BottomScreenRect.Height; - scwidth = 256 << ScreenScale[1]; - scheight = 192 << ScreenScale[1]; + scwidth = 256;// << ScreenScale[1]; + scheight = 192;// << ScreenScale[1]; switch (ScreenRotation) { @@ -349,9 +349,13 @@ void GLDrawing_DrawScreen() int frontbuf = GPU::FrontBuffer; glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, GL_ScreenTexture); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256<