Render3D:

- By default, do not create a separate RGBA6665 buffer for rendering. Instead, directly render to GPUEngineA’s RGBA6665 buffer.
- SoftRasterizer no longer needs to flush the RGBA6665 buffer now that it is rendered to directly.
- Fix the OpenGL renderer’s RGBA5551 buffer flushing on big-endian systems.
This commit is contained in:
rogerman 2016-02-15 05:25:45 +00:00
parent 607c3687ca
commit f985f40ba8
4 changed files with 12 additions and 80 deletions

View File

@ -812,6 +812,8 @@ OpenGLRenderer::OpenGLRenderer()
OpenGLRenderer::~OpenGLRenderer()
{
free_aligned(_framebufferColor);
// Destroy OpenGL rendering states
delete ref;
ref = NULL;
@ -966,7 +968,7 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
(srcRGBA8888[ir].g >> 3) & 0x1F,
(srcRGBA8888[ir].r >> 3) & 0x1F) |
((this->_framebufferColor[ir].a == 0) ? 0x0000 : 0x8000);
((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000);
#else
dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcRGBA8888[ir].color);
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
@ -1045,7 +1047,7 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
(srcRGBA8888[ir].g >> 3) & 0x1F,
(srcRGBA8888[ir].r >> 3) & 0x1F) |
((this->_framebufferColor[ir].a == 0) ? 0x0000 : 0x8000);
((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000);
#else
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
(srcRGBA8888[ir].g >> 3) & 0x1F,

View File

@ -2313,7 +2313,7 @@ void gfx3d_VBlankEndSignal(bool skipFrame)
if (!CommonSettings.showGpu.main)
{
memset(_gfx3d_colorRGBA6665, 0, GPU->GetCustomFramebufferWidth() * GPU->GetCustomFramebufferHeight() * sizeof(FragmentColor));
memset(GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), 0, GPU->GetCustomFramebufferWidth() * GPU->GetCustomFramebufferHeight() * sizeof(FragmentColor));
return;
}

View File

@ -1595,7 +1595,7 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarking(const u16 *colorTable, c
#define PIXOFFSET(dx,dy) ((dx)+(this->_framebufferWidth*(dy)))
#define ISEDGE(dx,dy) ((x+(dx) < this->_framebufferWidth) && (y+(dy) < this->_framebufferHeight) && polyID > this->_framebufferAttributes->opaquePolyID[i+PIXOFFSET(dx,dy)])
#define DRAWEDGE(dx,dy) alphaBlend(_framebufferColor[i+PIXOFFSET(dx,dy)], edgeColor)
#define DRAWEDGE(dx,dy) alphaBlend(this->_framebufferColor[i+PIXOFFSET(dx,dy)], edgeColor)
bool upleft = ISEDGE(-1,-1);
bool up = ISEDGE( 0,-1);
@ -2022,16 +2022,13 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
return RENDER3DERROR_NOERR;
}
const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor);
FragmentColor *oldFramebufferColor = this->_framebufferColor;
FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes);
FragmentAttributesBuffer *oldFramebufferAttributes = this->_framebufferAttributes;
FragmentAttributesBuffer *newFramebufferAttributes = new FragmentAttributesBuffer(w * h);
this->_framebufferWidth = w;
this->_framebufferHeight = h;
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
this->_framebufferColor = newFramebufferColor;
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferRGBA6665();
this->_framebufferAttributes = newFramebufferAttributes;
if (rasterizerCores == 0 || rasterizerCores == 1)
@ -2050,7 +2047,6 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
}
}
free_aligned(oldFramebufferColor);
delete oldFramebufferAttributes;
return RENDER3DERROR_NOERR;

View File

@ -236,7 +236,7 @@ Render3D::Render3D()
Render3D::~Render3D()
{
free_aligned(_framebufferColor);
// Do nothing.
}
RendererID Render3D::GetRenderID()
@ -271,16 +271,10 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h)
return RENDER3DERROR_NOERR;
}
const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor);
FragmentColor *oldFramebufferColor = this->_framebufferColor;
FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes);
this->_framebufferWidth = w;
this->_framebufferHeight = h;
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
this->_framebufferColor = newFramebufferColor;
free_aligned(oldFramebufferColor);
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferRGBA6665();
return RENDER3DERROR_NOERR;
}
@ -324,11 +318,6 @@ Render3DError Render3D::EndRender(const u64 frameCount)
Render3DError Render3D::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
{
if (dstRGBA6665 != NULL)
{
memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes);
}
if (dstRGBA5551 != NULL)
{
for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++)
@ -539,62 +528,7 @@ Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
const size_t ssePixCount = pixCount - (pixCount % 4);
if ( (dstRGBA6665 != NULL) && (dstRGBA5551 != NULL) )
{
for (; i < ssePixCount; i += 4)
{
// Copy the framebufferColor buffer
__m128i color = _mm_load_si128((__m128i *)(this->_framebufferColor + i));
_mm_store_si128((__m128i *)(dstRGBA6665 + i), color);
// Convert to RGBA5551
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x0000003E)); // Read from R
r = _mm_srli_epi32(r, 1); // Shift to R
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x00003E00)); // Read from G
g = _mm_srli_epi32(g, 4); // Shift in G
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x003E0000)); // Read from B
b = _mm_srli_epi32(b, 7); // Shift to B
__m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
a = _mm_cmpeq_epi32(a, zero_vec128); // Determine A
// From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned
// 16-bit. Since SSE2 only has packssdw (signed saturated 16-bit pack), using
// packssdw on the alpha bit (0x8000) will result in a value of 0x7FFF, which is
// incorrect. Now if we were to use SSE4.1's packusdw (unsigned saturated 16-bit
// pack), we wouldn't have to go through this hassle. But not everyone has an
// SSE4.1-capable CPU, so doing this the SSE2 way is more guaranteed to work for
// everyone's CPU.
//
// To use packssdw, we take a bit one position lower for the alpha bit, run
// packssdw, then shift the bit back to its original position. Then we por the
// alpha vector with the post-packed color vector to get the final color.
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000)); // Mask out the bit before A
a = _mm_packs_epi32(a, zero_vec128); // Pack 32-bit down to 16-bit
a = _mm_slli_epi16(a, 1); // Shift the A bit back to where it needs to be
// Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in.
color = _mm_or_si128(_mm_or_si128(r, g), b);
color = _mm_packs_epi32(color, zero_vec128);
color = _mm_or_si128(color, a);
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color);
}
for (; i < pixCount; i++)
{
dstRGBA6665[i] = this->_framebufferColor[i];
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
}
}
else if (dstRGBA6665 != NULL)
{
memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes);
}
else
if (dstRGBA5551 != NULL)
{
for (; i < ssePixCount; i += 4)
{