simpler GPU-compositing. will make it easier to grab neighbor 2D/3D pixels individually for filtering.

This commit is contained in:
Arisotura 2019-05-25 19:36:47 +02:00
parent 8c93a45574
commit 70a3243714
6 changed files with 266 additions and 287 deletions

View File

@ -152,7 +152,7 @@ void Reset()
VRAMMap_ARM7[1] = 0;
int fbsize;
if (Accelerated) fbsize = (256*3 + 2) * 192;
if (Accelerated) fbsize = (256*3 + 1) * 192;
else fbsize = 256 * 192;
for (int i = 0; i < fbsize; i++)
{
@ -177,7 +177,7 @@ void Reset()
void Stop()
{
int fbsize;
if (Accelerated) fbsize = (256*3 + 2) * 192;
if (Accelerated) fbsize = (256*3 + 1) * 192;
else fbsize = 256 * 192;
memset(Framebuffer[0][0], 0, fbsize*4);
memset(Framebuffer[0][1], 0, fbsize*4);
@ -257,7 +257,7 @@ void SetDisplaySettings(bool accel)
if (accel != Accelerated)
{
int fbsize;
if (accel) fbsize = (256*3 + 2) * 192;
if (accel) fbsize = (256*3 + 1) * 192;
else fbsize = 256 * 192;
if (Framebuffer[0][0]) delete[] Framebuffer[0][0];
if (Framebuffer[1][0]) delete[] Framebuffer[1][0];

View File

@ -617,10 +617,78 @@ u32 GPU2D::ColorBrightnessDown(u32 val, u32 factor)
return rb | g | 0xFF000000;
}
u32 GPU2D::ColorComposite(int i, u32 val1, u32 val2)
{
u32 coloreffect = 0;
u32 eva, evb;
u32 flag1 = val1 >> 24;
u32 flag2 = val2 >> 24;
u32 target2;
if (flag2 & 0x80) target2 = 0x1000;
else if (flag2 & 0x40) target2 = 0x0100;
else target2 = flag2 << 8;
if ((flag1 & 0x80) && (BlendCnt & target2))
{
// sprite blending
coloreffect = 1;
if (flag1 & 0x40)
{
eva = flag1 & 0x1F;
evb = 16 - eva;
}
else
{
eva = EVA;
evb = EVB;
}
}
else if ((flag1 & 0x40) && (BlendCnt & target2))
{
// 3D layer blending
coloreffect = 4;
}
else
{
if (flag1 & 0x80) flag1 = 0x10;
else if (flag1 & 0x40) flag1 = 0x01;
if ((BlendCnt & flag1) && (WindowMask[i] & 0x20))
{
coloreffect = (BlendCnt >> 6) & 0x3;
if (coloreffect == 1)
{
if (BlendCnt & target2)
{
eva = EVA;
evb = EVB;
}
else
coloreffect = 0;
}
}
}
switch (coloreffect)
{
case 0: return val1;
case 1: return ColorBlend4(val1, val2, eva, evb);
case 2: return ColorBrightnessUp(val1, EVY);
case 3: return ColorBrightnessDown(val1, EVY);
case 4: return ColorBlend5(val1, val2);
}
}
void GPU2D::DrawScanline(u32 line)
{
int stride = Accelerated ? (256*3 + 2) : 256;
int stride = Accelerated ? (256*3 + 1) : 256;
u32* dst = &Framebuffer[stride * line];
int n3dline = line;
@ -648,7 +716,6 @@ void GPU2D::DrawScanline(u32 line)
if (Accelerated)
{
dst[256*3] = 0;
dst[256*3 + 1] = 0;
}
return;
}
@ -747,14 +814,7 @@ void GPU2D::DrawScanline(u32 line)
if (Accelerated)
{
u32 ctl = (BlendCnt & 0x3FFF);
ctl |= ((DispCnt & 0x30000) >> 2);
ctl |= (EVA << 16);
ctl |= (EVB << 21);
ctl |= (EVY << 26);
dst[256*3] = ctl;
dst[256*3 + 1] = MasterBrightness;
dst[256*3] = MasterBrightness | (DispCnt & 0x30000);
return;
}
@ -858,111 +918,57 @@ void GPU2D::DoCapture(u32 line, u32 width)
// but when doing display capture, we do need the composited output
// so we do it here
u32 bldcnteffect = (BlendCnt >> 6) & 0x3;
for (int i = 0; i < 256; i++)
{
u32 val1 = BGOBJLine[i];
u32 val3 = BGOBJLine[256+i];
u32 val2 = BGOBJLine[512+i];
u32 val2 = BGOBJLine[256+i];
u32 val3 = BGOBJLine[512+i];
if ((val1 >> 30) == 1)
{
u32 _3dval = _3DLine[val1 & 0xFF];
if ((_3dval >> 24) > 0)
{
val1 = _3dval | 0x40000000;
val2 = val3;
}
else
val1 = val3;
}
else if ((val3 >> 30) == 1)
u32 compmode = (val3 >> 24) & 0xF;
if (compmode == 4)
{
// 3D on top, blending
u32 _3dval = _3DLine[val3 & 0xFF];
if ((_3dval >> 24) > 0)
val2 = _3dval | 0x40000000;
val1 = ColorBlend5(_3dval, val1);
else
val1 = val2;
}
else
val2 = val3;
val1 &= ~0x00800000;
val2 &= ~0x00800000;
u32 coloreffect, eva, evb;
u32 flag1 = val1 >> 24;
u32 flag2 = val2 >> 24;
u32 target2;
if (flag2 & 0x80) target2 = 0x1000;
else if (flag2 & 0x40) target2 = 0x0100;
else target2 = flag2 << 8;
if ((flag1 & 0x80) && (BlendCnt & target2))
else if (compmode == 1)
{
// sprite blending
// 3D on bottom, blending
coloreffect = 1;
if (flag1 & 0x40)
u32 _3dval = _3DLine[val3 & 0xFF];
if ((_3dval >> 24) > 0)
{
eva = flag1 & 0x1F;
evb = 16 - eva;
u32 eva = (val3 >> 8) & 0x1F;
u32 evb = (val3 >> 16) & 0x1F;
val1 = ColorBlend4(val1, _3dval, eva, evb);
}
else
{
eva = EVA;
evb = EVB;
}
val1 = val2;
}
else if ((flag1 & 0x40) && (BlendCnt & target2))
else if (compmode <= 3)
{
// 3D layer blending
// 3D on top, normal/fade
BGOBJLine[i] = ColorBlend5(val1, val2);
continue;
}
else
{
if (flag1 & 0x80) flag1 = 0x10;
else if (flag1 & 0x40) flag1 = 0x01;
if ((BlendCnt & flag1) && (WindowMask[i] & 0x20))
u32 _3dval = _3DLine[val3 & 0xFF];
if ((_3dval >> 24) > 0)
{
if ((bldcnteffect == 1) && (BlendCnt & target2))
{
coloreffect = 1;
eva = EVA;
evb = EVB;
}
else if (bldcnteffect >= 2)
coloreffect = bldcnteffect;
else
coloreffect = 0;
u32 evy = (val3 >> 8) & 0x1F;
val1 = _3dval;
if (compmode == 2) val1 = ColorBrightnessUp(val1, evy);
else if (compmode == 3) val1 = ColorBrightnessDown(val1, evy);
}
else
coloreffect = 0;
val1 = val2;
}
switch (coloreffect)
{
case 0:
BGOBJLine[i] = val1;
break;
case 1:
BGOBJLine[i] = ColorBlend4(val1, val2, eva, evb);
break;
case 2:
BGOBJLine[i] = ColorBrightnessUp(val1, EVY);
break;
case 3:
BGOBJLine[i] = ColorBrightnessDown(val1, EVY);
break;
}
BGOBJLine[i] = val1;
}
}
}
@ -1374,94 +1380,102 @@ void GPU2D::DrawScanline_BGOBJ(u32 line)
if (!Accelerated)
{
u32 bldcnteffect = (BlendCnt >> 6) & 0x3;
for (int i = 0; i < 256; i++)
{
u32 val1 = BGOBJLine[i];
u32 val2 = BGOBJLine[256+i];
u32 coloreffect, eva, evb;
u32 flag1 = val1 >> 24;
u32 flag2 = val2 >> 24;
u32 target2;
if (flag2 & 0x80) target2 = 0x1000;
else if (flag2 & 0x40) target2 = 0x0100;
else target2 = flag2 << 8;
if ((flag1 & 0x80) && (BlendCnt & target2))
{
// sprite blending
coloreffect = 1;
if (flag1 & 0x40)
{
eva = flag1 & 0x1F;
evb = 16 - eva;
}
else
{
eva = EVA;
evb = EVB;
}
}
else if ((flag1 & 0x40) && (BlendCnt & target2))
{
// 3D layer blending
BGOBJLine[i] = ColorBlend5(val1, val2);
continue;
}
else
{
if (flag1 & 0x80) flag1 = 0x10;
else if (flag1 & 0x40) flag1 = 0x01;
if ((BlendCnt & flag1) && (WindowMask[i] & 0x20))
{
if ((bldcnteffect == 1) && (BlendCnt & target2))
{
coloreffect = 1;
eva = EVA;
evb = EVB;
}
else if (bldcnteffect >= 2)
coloreffect = bldcnteffect;
else
coloreffect = 0;
}
else
coloreffect = 0;
}
switch (coloreffect)
{
case 0:
BGOBJLine[i] = val1;
break;
case 1:
BGOBJLine[i] = ColorBlend4(val1, val2, eva, evb);
break;
case 2:
BGOBJLine[i] = ColorBrightnessUp(val1, EVY);
break;
case 3:
BGOBJLine[i] = ColorBrightnessDown(val1, EVY);
break;
}
BGOBJLine[i] = ColorComposite(i, val1, val2);
}
}
else
{
for (int i = 0; i < 256; i++)
if (Num == 0)
{
BGOBJLine[i] |= ((WindowMask[i] & 0x20) << 18);
for (int i = 0; i < 256; i++)
{
u32 val1 = BGOBJLine[i];
u32 val2 = BGOBJLine[256+i];
u32 val3 = BGOBJLine[512+i];
u32 flag1 = val1 >> 24;
u32 flag2 = val2 >> 24;
u32 bldcnteffect = (BlendCnt >> 6) & 0x3;
u32 target1;
if (flag1 & 0x80) target1 = 0x0010;
else if (flag1 & 0x40) target1 = 0x0001;
else target1 = flag1;
u32 target2;
if (flag2 & 0x80) target2 = 0x1000;
else if (flag2 & 0x40) target2 = 0x0100;
else target2 = flag2 << 8;
if (((flag1 & 0xC0) == 0x40) && (BlendCnt & target2))
{
// 3D on top, blending
BGOBJLine[i] = val2;
BGOBJLine[256+i] = ColorComposite(i, val2, val3);
BGOBJLine[512+i] = 0x04000000 | (val1 & 0xFF);
}
else if ((flag1 & 0xC0) == 0x40)
{
// 3D on top, normal/fade
if (bldcnteffect == 1) bldcnteffect = 0;
if (!(BlendCnt & 0x0001)) bldcnteffect = 0;
if (!(WindowMask[i] & 0x20)) bldcnteffect = 0;
BGOBJLine[i] = val2;
BGOBJLine[256+i] = ColorComposite(i, val2, val3);
BGOBJLine[512+i] = (bldcnteffect << 24) | (EVY << 8) | (val1 & 0xFF);
}
else if (((flag2 & 0xC0) == 0x40) && ((BlendCnt & 0x01C0) == 0x0140))
{
// 3D on bottom, blending
u32 eva, evb;
if ((flag1 & 0xC0) == 0xC0)
{
eva = flag1 & 0x1F;
evb = 16 - eva;
}
else if ((BlendCnt & target1) && (WindowMask[i] & 0x20))
{
eva = EVA;
evb = EVB;
}
else
bldcnteffect = 7;
BGOBJLine[i] = val1;
BGOBJLine[256+i] = ColorComposite(i, val1, val3);
BGOBJLine[512+i] = (bldcnteffect << 24) | (EVB << 16) | (EVA << 8) | (val1 & 0xFF);
}
else
{
// no potential 3D pixel involved
BGOBJLine[i] = ColorComposite(i, val1, val2);
BGOBJLine[256+i] = 0;
BGOBJLine[512+i] = 0x07000000;
}
}
}
else
{
for (int i = 0; i < 256; i++)
{
u32 val1 = BGOBJLine[i];
u32 val2 = BGOBJLine[256+i];
BGOBJLine[i] = ColorComposite(i, val1, val2);
BGOBJLine[256+i] = 0;
BGOBJLine[512+i] = 0x07000000;
}
}
}

View File

@ -127,6 +127,7 @@ private:
u32 ColorBlend5(u32 val1, u32 val2);
u32 ColorBrightnessUp(u32 val, u32 factor);
u32 ColorBrightnessDown(u32 val, u32 factor);
u32 ColorComposite(int i, u32 val1, u32 val2);
template<u32 bgmode> void DrawScanlineBGMode(u32 line, u32 nsprites);
void DrawScanlineBGMode6(u32 line, u32 nsprites);

View File

@ -890,6 +890,8 @@ void RenderSceneChunk(int y, int h)
glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID);
glBindVertexArray(ClearVertexArrayID);
glDrawArrays(GL_TRIANGLES, 0, 2*3);
glFlush();
}
}

View File

@ -216,7 +216,7 @@ bool GLScreen_Init()
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI, 1024, 1536, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, NULL);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI, 256*3 + 1, 192*2, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, NULL);
GL_ScreenSizeDirty = true;
@ -317,31 +317,31 @@ void GLScreen_DrawScreen()
switch (ScreenRotation)
{
case 0:
s0 = 0; t0 = 768;
s1 = scwidth; t1 = 768;
s2 = 0; t2 = 768+scheight;
s3 = scwidth; t3 = 768+scheight;
s0 = 0; t0 = 192;
s1 = scwidth; t1 = 192;
s2 = 0; t2 = 192+scheight;
s3 = scwidth; t3 = 192+scheight;
break;
case 1:
s0 = 0; t0 = 768+scheight;
s1 = 0; t1 = 768;
s2 = scwidth; t2 = 768+scheight;
s3 = scwidth; t3 = 768;
s0 = 0; t0 = 192+scheight;
s1 = 0; t1 = 192;
s2 = scwidth; t2 = 192+scheight;
s3 = scwidth; t3 = 192;
break;
case 2:
s0 = scwidth; t0 = 768+scheight;
s1 = 0; t1 = 768+scheight;
s2 = scwidth; t2 = 768;
s3 = 0; t3 = 768;
s0 = scwidth; t0 = 192+scheight;
s1 = 0; t1 = 192+scheight;
s2 = scwidth; t2 = 192;
s3 = 0; t3 = 192;
break;
case 3:
s0 = scwidth; t0 = 768;
s1 = scwidth; t1 = 768+scheight;
s2 = 0; t2 = 768;
s3 = 0; t3 = 768+scheight;
s0 = scwidth; t0 = 192;
s1 = scwidth; t1 = 192+scheight;
s2 = 0; t2 = 192;
s3 = 0; t3 = 192+scheight;
break;
}
@ -381,14 +381,14 @@ void GLScreen_DrawScreen()
{
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA_INTEGER,
GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 768, 256, 192, GL_RGBA_INTEGER,
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 192, GL_RGBA_INTEGER,
GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]);
}
else
{
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256*3 + 2, 192, GL_RGBA_INTEGER,
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256*3 + 1, 192, GL_RGBA_INTEGER,
GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 768, 256*3 + 2, 192, GL_RGBA_INTEGER,
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256*3 + 1, 192, GL_RGBA_INTEGER,
GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]);
}
}
@ -2241,11 +2241,13 @@ void DestroyMainWindow()
void RecreateMainWindow(bool opengl)
{
int winX, winY;
int winX, winY, maxi;
uiWindowPosition(MainWindow, &winX, &winY);
maxi = uiWindowMaximized(MainWindow);
DestroyMainWindow();
CreateMainWindow(opengl);
uiWindowSetPosition(MainWindow, winX, winY);
uiWindowSetMaximized(MainWindow, maxi);
}

View File

@ -66,121 +66,81 @@ void main()
{
ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0));
// bit0-13: BLDCNT
// bit14-15: DISPCNT display mode
// bit16-20: EVA
// bit21-25: EVB
// bit26-30: EVY
ivec4 ctl = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0));
ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3 + 1, int(fTexcoord.y)), 0));
int dispmode = (ctl.g >> 6) & 0x3;
ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0));
int dispmode = mbright.b & 0x3;
if (dispmode == 1)
{
int eva = ctl.b & 0x1F;
int evb = (ctl.b >> 5) | ((ctl.a & 0x03) << 3);
int evy = ctl.a >> 2;
ivec4 val1 = pixel;
ivec4 val2 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(256,0), 0));
ivec4 val3 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(512,0), 0));
ivec4 top = pixel;
ivec4 mid = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(256,0), 0));
ivec4 bot = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(512,0), 0));
int compmode = val3.a & 0xF;
int eva, evb, evy;
int winmask = top.b >> 7;
if ((top.a & 0xC0) == 0x40)
if (compmode == 4)
{
float xpos = top.r + fract(fTexcoord.x);
float ypos = mod(fTexcoord.y, 768);
// 3D on top, blending
float xpos = val3.r + fract(fTexcoord.x);
float ypos = mod(fTexcoord.y, 192);
ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra
* vec4(63,63,63,31));
if (_3dpix.a > 0) { top = _3dpix; top.a |= 0x40; bot = mid; }
else top = mid;
if (_3dpix.a > 0)
{
eva = (_3dpix.a & 0x1F) + 1;
evb = 32 - eva;
val1 = ((_3dpix * eva) + (val1 * evb)) >> 5;
if (eva <= 16) val1 += ivec4(1,1,1,0);
val1 = min(val1, 0x3F);
}
else
val1 = val2;
}
else if ((mid.a & 0xC0) == 0x40)
else if (compmode == 1)
{
float xpos = mid.r + fract(fTexcoord.x);
float ypos = mod(fTexcoord.y, 768);
// 3D on bottom, blending
float xpos = val3.r + fract(fTexcoord.x);
float ypos = mod(fTexcoord.y, 192);
ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra
* vec4(63,63,63,31));
if (_3dpix.a > 0) { bot = _3dpix; bot.a |= 0x40; }
}
else
{
// conditional texture fetch no good for performance, apparently
//texelFetch(_3DTex, ivec2(0, fTexcoord.y*2), 0);
bot = mid;
}
top.b &= 0x3F;
bot.b &= 0x3F;
int target2;
if ((bot.a & 0x80) != 0) target2 = 0x10;
else if ((bot.a & 0x40) != 0) target2 = 0x01;
else target2 = bot.a;
bool t2pass = ((ctl.g & target2) != 0);
int coloreffect = 0;
if ((top.a & 0x80) != 0 && t2pass)
{
// sprite blending
coloreffect = 1;
if ((top.a & 0x40) != 0)
if (_3dpix.a > 0)
{
eva = top.a & 0x1F;
evb = 16 - eva;
eva = val3.g;
evb = val3.b;
val1 = ((val1 * eva) + (_3dpix * evb)) >> 4;
val1 = min(val1, 0x3F);
}
else
val1 = val2;
}
else if ((top.a & 0x40) != 0 && t2pass)
else if (compmode <= 3)
{
// 3D layer blending
// 3D on top, normal/fade
coloreffect = 4;
eva = (top.a & 0x1F) + 1;
evb = 32 - eva;
}
else
{
if ((top.a & 0x80) != 0) top.a = 0x10;
else if ((top.a & 0x40) != 0) top.a = 0x01;
float xpos = val3.r + fract(fTexcoord.x);
float ypos = mod(fTexcoord.y, 192);
ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra
* vec4(63,63,63,31));
if ((ctl.r & top.a) != 0 && winmask != 0)
if (_3dpix.a > 0)
{
int effect = ctl.r >> 6;
if ((effect != 1) || t2pass) coloreffect = effect;
evy = val3.g;
val1 = _3dpix;
if (compmode == 2) val1 += ((ivec4(0x3F,0x3F,0x3F,0) - val1) * evy) >> 4;
else if (compmode == 3) val1 -= (val1 * evy) >> 4;
}
else
val1 = val2;
}
if (coloreffect == 0)
{
pixel = top;
}
else if (coloreffect == 1)
{
pixel = ((top * eva) + (bot * evb)) >> 4;
pixel = min(pixel, 0x3F);
}
else if (coloreffect == 2)
{
pixel = top;
pixel += ((ivec4(0x3F,0x3F,0x3F,0) - pixel) * evy) >> 4;
}
else if (coloreffect == 3)
{
pixel = top;
pixel -= (pixel * evy) >> 4;
}
else
{
pixel = ((top * eva) + (bot * evb)) >> 5;
if (eva <= 16) pixel += ivec4(1,1,1,0);
pixel = min(pixel, 0x3F);
}
pixel = val1;
}
if (dispmode != 0)