From f985f40ba81e351a5a8603056e511a66f931a7b8 Mon Sep 17 00:00:00 2001 From: rogerman Date: Mon, 15 Feb 2016 05:25:45 +0000 Subject: [PATCH] =?UTF-8?q?Render3D:=20-=20By=20default,=20do=20not=20crea?= =?UTF-8?q?te=20a=20separate=20RGBA6665=20buffer=20for=20rendering.=20Inst?= =?UTF-8?q?ead,=20directly=20render=20to=20GPUEngineA=E2=80=99s=20RGBA6665?= =?UTF-8?q?=20buffer.=20-=20SoftRasterizer=20no=20longer=20needs=20to=20fl?= =?UTF-8?q?ush=20the=20RGBA6665=20buffer=20now=20that=20it=20is=20rendered?= =?UTF-8?q?=20to=20directly.=20-=20Fix=20the=20OpenGL=20renderer=E2=80=99s?= =?UTF-8?q?=20RGBA5551=20buffer=20flushing=20on=20big-endian=20systems.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- desmume/src/OGLRender.cpp | 6 ++-- desmume/src/gfx3d.cpp | 2 +- desmume/src/rasterize.cpp | 10 ++---- desmume/src/render3D.cpp | 74 +++------------------------------------ 4 files changed, 12 insertions(+), 80 deletions(-) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index ab674ec0f..e0b695671 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -812,6 +812,8 @@ OpenGLRenderer::OpenGLRenderer() OpenGLRenderer::~OpenGLRenderer() { + free_aligned(_framebufferColor); + // Destroy OpenGL rendering states delete ref; ref = NULL; @@ -966,7 +968,7 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F, (srcRGBA8888[ir].g >> 3) & 0x1F, (srcRGBA8888[ir].r >> 3) & 0x1F) | - ((this->_framebufferColor[ir].a == 0) ? 0x0000 : 0x8000); + ((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000); #else dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcRGBA8888[ir].color); dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F, @@ -1045,7 +1047,7 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F, (srcRGBA8888[ir].g >> 3) & 0x1F, (srcRGBA8888[ir].r >> 3) & 0x1F) | - ((this->_framebufferColor[ir].a == 0) ? 0x0000 : 0x8000); + ((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000); #else dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F, (srcRGBA8888[ir].g >> 3) & 0x1F, diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index 39b8d7d4c..0d1d9629b 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -2313,7 +2313,7 @@ void gfx3d_VBlankEndSignal(bool skipFrame) if (!CommonSettings.showGpu.main) { - memset(_gfx3d_colorRGBA6665, 0, GPU->GetCustomFramebufferWidth() * GPU->GetCustomFramebufferHeight() * sizeof(FragmentColor)); + memset(GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), 0, GPU->GetCustomFramebufferWidth() * GPU->GetCustomFramebufferHeight() * sizeof(FragmentColor)); return; } diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index aab39031e..53b6bbab7 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -1595,7 +1595,7 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarking(const u16 *colorTable, c #define PIXOFFSET(dx,dy) ((dx)+(this->_framebufferWidth*(dy))) #define ISEDGE(dx,dy) ((x+(dx) < this->_framebufferWidth) && (y+(dy) < this->_framebufferHeight) && polyID > this->_framebufferAttributes->opaquePolyID[i+PIXOFFSET(dx,dy)]) -#define DRAWEDGE(dx,dy) alphaBlend(_framebufferColor[i+PIXOFFSET(dx,dy)], edgeColor) +#define DRAWEDGE(dx,dy) alphaBlend(this->_framebufferColor[i+PIXOFFSET(dx,dy)], edgeColor) bool upleft = ISEDGE(-1,-1); bool up = ISEDGE( 0,-1); @@ -2022,16 +2022,13 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h) return RENDER3DERROR_NOERR; } - const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); - FragmentColor *oldFramebufferColor = this->_framebufferColor; - FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes); FragmentAttributesBuffer *oldFramebufferAttributes = this->_framebufferAttributes; FragmentAttributesBuffer *newFramebufferAttributes = new FragmentAttributesBuffer(w * h); this->_framebufferWidth = w; this->_framebufferHeight = h; - this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes; - this->_framebufferColor = newFramebufferColor; + this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor); + this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferRGBA6665(); this->_framebufferAttributes = newFramebufferAttributes; if (rasterizerCores == 0 || rasterizerCores == 1) @@ -2050,7 +2047,6 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h) } } - free_aligned(oldFramebufferColor); delete oldFramebufferAttributes; return RENDER3DERROR_NOERR; diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index 2084cb5eb..f0659d13b 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -236,7 +236,7 @@ Render3D::Render3D() Render3D::~Render3D() { - free_aligned(_framebufferColor); + // Do nothing. } RendererID Render3D::GetRenderID() @@ -271,16 +271,10 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h) return RENDER3DERROR_NOERR; } - const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); - FragmentColor *oldFramebufferColor = this->_framebufferColor; - FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes); - this->_framebufferWidth = w; this->_framebufferHeight = h; - this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes; - this->_framebufferColor = newFramebufferColor; - - free_aligned(oldFramebufferColor); + this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor); + this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferRGBA6665(); return RENDER3DERROR_NOERR; } @@ -324,11 +318,6 @@ Render3DError Render3D::EndRender(const u64 frameCount) Render3DError Render3D::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551) { - if (dstRGBA6665 != NULL) - { - memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes); - } - if (dstRGBA5551 != NULL) { for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++) @@ -539,62 +528,7 @@ Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6 const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight; const size_t ssePixCount = pixCount - (pixCount % 4); - if ( (dstRGBA6665 != NULL) && (dstRGBA5551 != NULL) ) - { - for (; i < ssePixCount; i += 4) - { - // Copy the framebufferColor buffer - __m128i color = _mm_load_si128((__m128i *)(this->_framebufferColor + i)); - _mm_store_si128((__m128i *)(dstRGBA6665 + i), color); - - // Convert to RGBA5551 - __m128i r = _mm_and_si128(color, _mm_set1_epi32(0x0000003E)); // Read from R - r = _mm_srli_epi32(r, 1); // Shift to R - - __m128i g = _mm_and_si128(color, _mm_set1_epi32(0x00003E00)); // Read from G - g = _mm_srli_epi32(g, 4); // Shift in G - - __m128i b = _mm_and_si128(color, _mm_set1_epi32(0x003E0000)); // Read from B - b = _mm_srli_epi32(b, 7); // Shift to B - - __m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A - a = _mm_cmpeq_epi32(a, zero_vec128); // Determine A - - // From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned - // 16-bit. Since SSE2 only has packssdw (signed saturated 16-bit pack), using - // packssdw on the alpha bit (0x8000) will result in a value of 0x7FFF, which is - // incorrect. Now if we were to use SSE4.1's packusdw (unsigned saturated 16-bit - // pack), we wouldn't have to go through this hassle. But not everyone has an - // SSE4.1-capable CPU, so doing this the SSE2 way is more guaranteed to work for - // everyone's CPU. - // - // To use packssdw, we take a bit one position lower for the alpha bit, run - // packssdw, then shift the bit back to its original position. Then we por the - // alpha vector with the post-packed color vector to get the final color. - - a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000)); // Mask out the bit before A - a = _mm_packs_epi32(a, zero_vec128); // Pack 32-bit down to 16-bit - a = _mm_slli_epi16(a, 1); // Shift the A bit back to where it needs to be - - // Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in. - color = _mm_or_si128(_mm_or_si128(r, g), b); - color = _mm_packs_epi32(color, zero_vec128); - color = _mm_or_si128(color, a); - - _mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color); - } - - for (; i < pixCount; i++) - { - dstRGBA6665[i] = this->_framebufferColor[i]; - dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000); - } - } - else if (dstRGBA6665 != NULL) - { - memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes); - } - else + if (dstRGBA5551 != NULL) { for (; i < ssePixCount; i += 4) {