Render3D:
- Provide a means of specifying which specific framebuffers need to be flushed for each frame.
This commit is contained in:
parent
ed1879dc8f
commit
f68df5b976
|
@ -4089,6 +4089,18 @@ void GPUEngineA::SetCustomFramebufferSize(size_t w, size_t h)
|
|||
free_aligned(oldColorRGBA5551Buffer);
|
||||
}
|
||||
|
||||
|
||||
bool GPUEngineA::Is3DRenderedLayerNeeded()
|
||||
{
|
||||
return ( this->_enableLayer[GPULayerID_BG0] && (this->_IORegisterMap->DISPCNT.BG0_3D != 0) );
|
||||
}
|
||||
|
||||
bool GPUEngineA::Is3DCapturingNeeded()
|
||||
{
|
||||
const IOREG_DISPCAPCNT &DISPCAPCNT = this->_IORegisterMap->DISPCAPCNT;
|
||||
return ( (DISPCAPCNT.CaptureEnable != 0) && (vramConfiguration.banks[DISPCAPCNT.VRAMWriteBlock].purpose == VramConfiguration::LCDC) && (DISPCAPCNT.SrcA != 0) );
|
||||
}
|
||||
|
||||
template<bool ISCUSTOMRENDERINGNEEDED>
|
||||
void GPUEngineA::RenderLine(const u16 l)
|
||||
{
|
||||
|
@ -5681,7 +5693,10 @@ void GPUSubsystem::RenderLine(const u16 l, bool isFrameSkipRequested)
|
|||
{
|
||||
if (l == 0)
|
||||
{
|
||||
CurrentRenderer->SetFramebufferFlushStates(this->_engineMain->Is3DRenderedLayerNeeded(), this->_engineMain->Is3DCapturingNeeded());
|
||||
CurrentRenderer->RenderFinish();
|
||||
CurrentRenderer->SetFramebufferFlushStates(true, true);
|
||||
|
||||
this->_event->DidFrameBegin();
|
||||
this->UpdateVRAM3DUsageProperties();
|
||||
|
||||
|
|
|
@ -1403,6 +1403,9 @@ public:
|
|||
FragmentColor* Get3DFramebufferRGBA6665() const;
|
||||
u16* Get3DFramebufferRGBA5551() const;
|
||||
virtual void SetCustomFramebufferSize(size_t w, size_t h);
|
||||
|
||||
bool Is3DRenderedLayerNeeded();
|
||||
bool Is3DCapturingNeeded();
|
||||
|
||||
template<bool ISCUSTOMRENDERINGNEEDED> void RenderLine(const u16 l);
|
||||
void FramebufferPostprocess();
|
||||
|
|
|
@ -895,7 +895,7 @@ void OpenGLRenderer::SetVersion(unsigned int major, unsigned int minor, unsigned
|
|||
|
||||
Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict srcRGBA8888, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
||||
{
|
||||
if (srcRGBA8888 == NULL)
|
||||
if ( srcRGBA8888 == NULL || ((dstRGBA6665 == NULL) && (dstRGBA5551 == NULL)) )
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
@ -907,69 +907,152 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s
|
|||
const size_t pixCount = this->_framebufferWidth;
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
if ( (dstRGBA6665 != NULL) && (dstRGBA5551 != NULL) )
|
||||
{
|
||||
size_t x = 0;
|
||||
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
size_t x = 0;
|
||||
|
||||
#if defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||
{
|
||||
// Convert to RGBA6665
|
||||
__m128i color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
||||
color = _mm_srli_epi32(color, 2);
|
||||
|
||||
__m128i a = _mm_srli_epi32(color, 1); // Special handling for 5-bit alpha
|
||||
a = _mm_and_si128(a, _mm_set1_epi32(0x1F000000));
|
||||
|
||||
color = _mm_and_si128(color, _mm_set1_epi32(0x003F3F3F));
|
||||
color = _mm_or_si128(color, a);
|
||||
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA
|
||||
_mm_store_si128((__m128i *)(dstRGBA6665 + iw), color);
|
||||
|
||||
// Convert to RGBA5551
|
||||
color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
||||
|
||||
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x000000F8)); // Read from R
|
||||
b = _mm_slli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x0000F800)); // Read from G
|
||||
g = _mm_srli_epi32(g, 6); // Shift in G
|
||||
|
||||
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x00F80000)); // Read from B
|
||||
r = _mm_srli_epi32(r, 19); // Shift to R
|
||||
|
||||
a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A
|
||||
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A
|
||||
|
||||
color = _mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a);
|
||||
|
||||
// All the colors are currently placed on 32 bit boundaries, so we need to swizzle them
|
||||
// to the lower 64 bits of our vector before we store them back to memory.
|
||||
// Note: Do not attempt to use packssdw here since packing with the 0x8000 bit set will
|
||||
// result in values of 0x7FFF, which are incorrect values in this case.
|
||||
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), color);
|
||||
}
|
||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||
{
|
||||
__m128i color;
|
||||
|
||||
// Convert to RGBA6665
|
||||
color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
||||
color = _mm_srli_epi32(color, 2);
|
||||
|
||||
__m128i a = _mm_srli_epi32(color, 1); // Special handling for 5-bit alpha
|
||||
a = _mm_and_si128(a, _mm_set1_epi32(0x1F000000));
|
||||
|
||||
color = _mm_and_si128(color, _mm_set1_epi32(0x003F3F3F));
|
||||
color = _mm_or_si128(color, a);
|
||||
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA
|
||||
_mm_store_si128((__m128i *)(dstRGBA6665 + iw), color);
|
||||
|
||||
// Convert to RGBA5551
|
||||
color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
||||
|
||||
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x000000F8)); // Read from R
|
||||
b = _mm_slli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x0000F800)); // Read from G
|
||||
g = _mm_srli_epi32(g, 6); // Shift in G
|
||||
|
||||
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x00F80000)); // Read from B
|
||||
r = _mm_srli_epi32(r, 19); // Shift to R
|
||||
|
||||
a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A
|
||||
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A
|
||||
|
||||
color = _mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a);
|
||||
|
||||
// All the colors are currently placed on 32 bit boundaries, so we need to swizzle them
|
||||
// to the lower 64 bits of our vector before we store them back to memory.
|
||||
// Note: Do not attempt to use packssdw here since packing with the 0x8000 bit set will
|
||||
// result in values of 0x7FFF, which are incorrect values in this case.
|
||||
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), color);
|
||||
}
|
||||
#endif // defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
// Use the correct endian format since OpenGL uses the native endian of
|
||||
// the architecture it is running on.
|
||||
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
// Use the correct endian format since OpenGL uses the native endian of
|
||||
// the architecture it is running on.
|
||||
#ifdef LOCAL_BE
|
||||
dstRGBA6665[iw].color = BGRA8888_32_To_RGBA6665_32(srcRGBA8888[ir].color);
|
||||
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].g >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].r >> 3) & 0x1F) |
|
||||
((this->_framebufferColor[ir].a == 0) ? 0x0000 : 0x8000);
|
||||
dstRGBA6665[iw].color = BGRA8888_32_To_RGBA6665_32(srcRGBA8888[ir].color);
|
||||
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].g >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].r >> 3) & 0x1F) |
|
||||
((this->_framebufferColor[ir].a == 0) ? 0x0000 : 0x8000);
|
||||
#else
|
||||
dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcRGBA8888[ir].color);
|
||||
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].g >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].r >> 3) & 0x1F) |
|
||||
((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000);
|
||||
dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcRGBA8888[ir].color);
|
||||
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].g >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].r >> 3) & 0x1F) |
|
||||
((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (dstRGBA6665 != NULL)
|
||||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
size_t x = 0;
|
||||
|
||||
#if defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||
{
|
||||
__m128i color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
||||
color = _mm_srli_epi32(color, 2);
|
||||
|
||||
__m128i a = _mm_srli_epi32(color, 1); // Special handling for 5-bit alpha
|
||||
a = _mm_and_si128(a, _mm_set1_epi32(0x1F000000));
|
||||
|
||||
color = _mm_and_si128(color, _mm_set1_epi32(0x003F3F3F));
|
||||
color = _mm_or_si128(color, a);
|
||||
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA
|
||||
_mm_store_si128((__m128i *)(dstRGBA6665 + iw), color);
|
||||
}
|
||||
#endif // defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstRGBA6665[iw].color = BGRA8888_32_To_RGBA6665_32(srcRGBA8888[ir].color);
|
||||
#else
|
||||
dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcRGBA8888[ir].color);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
size_t x = 0;
|
||||
|
||||
#if defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
for (; x < ssePixCount; x += 4, ir += 4, iw += 4)
|
||||
{
|
||||
__m128i color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir));
|
||||
|
||||
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x000000F8)); // Read from R
|
||||
b = _mm_slli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x0000F800)); // Read from G
|
||||
g = _mm_srli_epi32(g, 6); // Shift in G
|
||||
|
||||
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x00F80000)); // Read from B
|
||||
r = _mm_srli_epi32(r, 19); // Shift to R
|
||||
|
||||
__m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A
|
||||
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A
|
||||
|
||||
color = _mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a);
|
||||
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), color);
|
||||
}
|
||||
#endif // defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
|
||||
for (; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
#ifdef LOCAL_BE
|
||||
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].g >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].r >> 3) & 0x1F) |
|
||||
((this->_framebufferColor[ir].a == 0) ? 0x0000 : 0x8000);
|
||||
#else
|
||||
dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].g >> 3) & 0x1F,
|
||||
(srcRGBA8888[ir].r >> 3) & 0x1F) |
|
||||
((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2736,28 +2819,34 @@ Render3DError OpenGLRenderer_1_2::RenderFinish()
|
|||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
if(!BEGINGL())
|
||||
{
|
||||
GPU->GetEventHandler()->DidRender3DEnd();
|
||||
return OGLERROR_BEGINGL_FAILED;
|
||||
}
|
||||
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||
|
||||
if (this->isPBOSupported)
|
||||
if ( (framebufferRGBA6665 != NULL) || (framebufferRGBA5551 != NULL) )
|
||||
{
|
||||
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);
|
||||
if (mappedBufferPtr != NULL)
|
||||
if(!BEGINGL())
|
||||
{
|
||||
this->FlushFramebuffer(mappedBufferPtr, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
||||
glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB);
|
||||
GPU->GetEventHandler()->DidRender3DEnd();
|
||||
return OGLERROR_BEGINGL_FAILED;
|
||||
}
|
||||
|
||||
if (this->isPBOSupported)
|
||||
{
|
||||
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);
|
||||
if (mappedBufferPtr != NULL)
|
||||
{
|
||||
this->FlushFramebuffer(mappedBufferPtr, framebufferRGBA6665, framebufferRGBA5551);
|
||||
glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, this->_framebufferColor);
|
||||
this->FlushFramebuffer(this->_framebufferColor, framebufferRGBA6665, framebufferRGBA5551);
|
||||
}
|
||||
|
||||
ENDGL();
|
||||
}
|
||||
else
|
||||
{
|
||||
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, this->_framebufferColor);
|
||||
this->FlushFramebuffer(this->_framebufferColor, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
||||
}
|
||||
|
||||
ENDGL();
|
||||
|
||||
this->_pixelReadNeedsFinish = false;
|
||||
|
||||
|
@ -3314,28 +3403,34 @@ Render3DError OpenGLRenderer_1_5::RenderFinish()
|
|||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
if(!BEGINGL())
|
||||
{
|
||||
GPU->GetEventHandler()->DidRender3DEnd();
|
||||
return OGLERROR_BEGINGL_FAILED;
|
||||
}
|
||||
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||
|
||||
if (this->isPBOSupported)
|
||||
if ( (framebufferRGBA6665 != NULL) || (framebufferRGBA5551 != NULL) )
|
||||
{
|
||||
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY);
|
||||
if (mappedBufferPtr != NULL)
|
||||
if(!BEGINGL())
|
||||
{
|
||||
this->FlushFramebuffer(mappedBufferPtr, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER_ARB);
|
||||
GPU->GetEventHandler()->DidRender3DEnd();
|
||||
return OGLERROR_BEGINGL_FAILED;
|
||||
}
|
||||
|
||||
if (this->isPBOSupported)
|
||||
{
|
||||
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY);
|
||||
if (mappedBufferPtr != NULL)
|
||||
{
|
||||
this->FlushFramebuffer(mappedBufferPtr, framebufferRGBA6665, framebufferRGBA5551);
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER_ARB);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, this->_framebufferColor);
|
||||
this->FlushFramebuffer(this->_framebufferColor, framebufferRGBA6665, framebufferRGBA5551);
|
||||
}
|
||||
|
||||
ENDGL();
|
||||
}
|
||||
else
|
||||
{
|
||||
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, this->_framebufferColor);
|
||||
this->FlushFramebuffer(this->_framebufferColor, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
||||
}
|
||||
|
||||
ENDGL();
|
||||
|
||||
this->_pixelReadNeedsFinish = false;
|
||||
|
||||
|
@ -4107,21 +4202,27 @@ Render3DError OpenGLRenderer_2_1::RenderFinish()
|
|||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
if(!BEGINGL())
|
||||
{
|
||||
GPU->GetEventHandler()->DidRender3DEnd();
|
||||
return OGLERROR_BEGINGL_FAILED;
|
||||
}
|
||||
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||
|
||||
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
|
||||
if (mappedBufferPtr != NULL)
|
||||
if ( (framebufferRGBA6665 != NULL) || (framebufferRGBA5551 != NULL) )
|
||||
{
|
||||
this->FlushFramebuffer(mappedBufferPtr, GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
if(!BEGINGL())
|
||||
{
|
||||
GPU->GetEventHandler()->DidRender3DEnd();
|
||||
return OGLERROR_BEGINGL_FAILED;
|
||||
}
|
||||
|
||||
const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
|
||||
if (mappedBufferPtr != NULL)
|
||||
{
|
||||
this->FlushFramebuffer(mappedBufferPtr, framebufferRGBA6665, framebufferRGBA5551);
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
}
|
||||
|
||||
ENDGL();
|
||||
}
|
||||
|
||||
ENDGL();
|
||||
|
||||
this->_pixelReadNeedsFinish = false;
|
||||
|
||||
GPU->GetEventHandler()->DidRender3DEnd();
|
||||
|
|
|
@ -1961,7 +1961,9 @@ Render3DError SoftRasterizerRenderer::EndRender(const u64 frameCount)
|
|||
this->RenderEdgeMarkingAndFog(this->postprocessParam[0]);
|
||||
}
|
||||
|
||||
this->FlushFramebuffer(GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
||||
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||
this->FlushFramebuffer(framebufferRGBA6665, framebufferRGBA5551);
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
|
@ -2005,7 +2007,9 @@ Render3DError SoftRasterizerRenderer::RenderFinish()
|
|||
}
|
||||
}
|
||||
|
||||
this->FlushFramebuffer(GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551());
|
||||
FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL;
|
||||
u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL;
|
||||
this->FlushFramebuffer(framebufferRGBA6665, framebufferRGBA5551);
|
||||
|
||||
GPU->GetEventHandler()->DidRender3DEnd();
|
||||
return RENDER3DERROR_NOERR;
|
||||
|
|
|
@ -228,6 +228,9 @@ Render3D::Render3D()
|
|||
_framebufferColorSizeBytes = 0;
|
||||
_framebufferColor = NULL;
|
||||
|
||||
_willFlushFramebufferRGBA6665 = true;
|
||||
_willFlushFramebufferRGBA5551 = true;
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
@ -282,6 +285,18 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h)
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
void Render3D::GetFramebufferFlushStates(bool &willFlushRGBA6665, bool &willFlushRGBA5551)
|
||||
{
|
||||
willFlushRGBA6665 = this->_willFlushFramebufferRGBA6665;
|
||||
willFlushRGBA5551 = this->_willFlushFramebufferRGBA5551;
|
||||
}
|
||||
|
||||
void Render3D::SetFramebufferFlushStates(bool willFlushRGBA6665, bool willFlushRGBA5551)
|
||||
{
|
||||
this->_willFlushFramebufferRGBA6665 = willFlushRGBA6665;
|
||||
this->_willFlushFramebufferRGBA5551 = willFlushRGBA5551;
|
||||
}
|
||||
|
||||
Render3DError Render3D::BeginRender(const GFX3D &engine)
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
|
@ -309,12 +324,17 @@ Render3DError Render3D::EndRender(const u64 frameCount)
|
|||
|
||||
Render3DError Render3D::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
||||
{
|
||||
memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||
|
||||
// Convert to RGBA5551
|
||||
for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++)
|
||||
if (dstRGBA6665 != NULL)
|
||||
{
|
||||
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||
memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||
}
|
||||
|
||||
if (dstRGBA5551 != NULL)
|
||||
{
|
||||
for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++)
|
||||
{
|
||||
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
|
@ -453,6 +473,9 @@ Render3DError Render3D::Reset()
|
|||
memset(this->clearImagePolyIDBuffer, 0, sizeof(this->clearImagePolyIDBuffer));
|
||||
memset(this->clearImageFogBuffer, 0, sizeof(this->clearImageFogBuffer));
|
||||
|
||||
this->_willFlushFramebufferRGBA6665 = true;
|
||||
this->_willFlushFramebufferRGBA5551 = true;
|
||||
|
||||
TexCache_Reset();
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
|
@ -505,59 +528,118 @@ Render3DError Render3D::VramReconfigureSignal()
|
|||
|
||||
Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
||||
{
|
||||
if ( (dstRGBA6665 == NULL) && (dstRGBA5551 == NULL) )
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
const __m128i zero_vec128 = _mm_setzero_si128();
|
||||
|
||||
size_t i = 0;
|
||||
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
|
||||
for (; i < ssePixCount; i += 4)
|
||||
if ( (dstRGBA6665 != NULL) && (dstRGBA5551 != NULL) )
|
||||
{
|
||||
// Copy the framebufferColor buffer
|
||||
__m128i color = _mm_load_si128((__m128i *)(this->_framebufferColor + i));
|
||||
_mm_store_si128((__m128i *)(dstRGBA6665 + i), color);
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
// Copy the framebufferColor buffer
|
||||
__m128i color = _mm_load_si128((__m128i *)(this->_framebufferColor + i));
|
||||
_mm_store_si128((__m128i *)(dstRGBA6665 + i), color);
|
||||
|
||||
// Convert to RGBA5551
|
||||
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x0000003E)); // Read from R
|
||||
r = _mm_srli_epi32(r, 1); // Shift to R
|
||||
|
||||
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x00003E00)); // Read from G
|
||||
g = _mm_srli_epi32(g, 4); // Shift in G
|
||||
|
||||
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x003E0000)); // Read from B
|
||||
b = _mm_srli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpeq_epi32(a, zero_vec128); // Determine A
|
||||
|
||||
// From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned
|
||||
// 16-bit. Since SSE2 only has packssdw (signed saturated 16-bit pack), using
|
||||
// packssdw on the alpha bit (0x8000) will result in a value of 0x7FFF, which is
|
||||
// incorrect. Now if we were to use SSE4.1's packusdw (unsigned saturated 16-bit
|
||||
// pack), we wouldn't have to go through this hassle. But not everyone has an
|
||||
// SSE4.1-capable CPU, so doing this the SSE2 way is more guaranteed to work for
|
||||
// everyone's CPU.
|
||||
//
|
||||
// To use packssdw, we take a bit one position lower for the alpha bit, run
|
||||
// packssdw, then shift the bit back to its original position. Then we por the
|
||||
// alpha vector with the post-packed color vector to get the final color.
|
||||
|
||||
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000)); // Mask out the bit before A
|
||||
a = _mm_packs_epi32(a, zero_vec128); // Pack 32-bit down to 16-bit
|
||||
a = _mm_slli_epi16(a, 1); // Shift the A bit back to where it needs to be
|
||||
|
||||
// Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in.
|
||||
color = _mm_or_si128(_mm_or_si128(r, g), b);
|
||||
color = _mm_packs_epi32(color, zero_vec128);
|
||||
color = _mm_or_si128(color, a);
|
||||
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color);
|
||||
}
|
||||
|
||||
// Convert to RGBA5551
|
||||
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x0000003E)); // Read from R
|
||||
r = _mm_srli_epi32(r, 1); // Shift to R
|
||||
|
||||
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x00003E00)); // Read from G
|
||||
g = _mm_srli_epi32(g, 4); // Shift in G
|
||||
|
||||
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x003E0000)); // Read from B
|
||||
b = _mm_srli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpeq_epi32(a, zero_vec128); // Determine A
|
||||
|
||||
// From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned
|
||||
// 16-bit. Since SSE2 only has packssdw (signed saturated 16-bit pack), using
|
||||
// packssdw on the alpha bit (0x8000) will result in a value of 0x7FFF, which is
|
||||
// incorrect. Now if we were to use SSE4.1's packusdw (unsigned saturated 16-bit
|
||||
// pack), we wouldn't have to go through this hassle. But not everyone has an
|
||||
// SSE4.1-capable CPU, so doing this the SSE2 way is more guaranteed to work for
|
||||
// everyone's CPU.
|
||||
//
|
||||
// To use packssdw, we take a bit one position lower for the alpha bit, run
|
||||
// packssdw, then shift the bit back to its original position. Then we por the
|
||||
// alpha vector with the post-packed color vector to get the final color.
|
||||
|
||||
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000)); // Mask out the bit before A
|
||||
a = _mm_packs_epi32(a, zero_vec128); // Pack 32-bit down to 16-bit
|
||||
a = _mm_slli_epi16(a, 1); // Shift the A bit back to where it needs to be
|
||||
|
||||
// Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in.
|
||||
color = _mm_or_si128(_mm_or_si128(r, g), b);
|
||||
color = _mm_packs_epi32(color, zero_vec128);
|
||||
color = _mm_or_si128(color, a);
|
||||
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color);
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstRGBA6665[i] = this->_framebufferColor[i];
|
||||
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
else if (dstRGBA6665 != NULL)
|
||||
{
|
||||
dstRGBA6665[i] = this->_framebufferColor[i];
|
||||
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||
memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (; i < ssePixCount; i += 4)
|
||||
{
|
||||
// Convert to RGBA5551
|
||||
__m128i color = _mm_load_si128((__m128i *)(this->_framebufferColor + i));
|
||||
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x0000003E)); // Read from R
|
||||
r = _mm_srli_epi32(r, 1); // Shift to R
|
||||
|
||||
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x00003E00)); // Read from G
|
||||
g = _mm_srli_epi32(g, 4); // Shift in G
|
||||
|
||||
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x003E0000)); // Read from B
|
||||
b = _mm_srli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpeq_epi32(a, zero_vec128); // Determine A
|
||||
|
||||
// From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned
|
||||
// 16-bit. Since SSE2 only has packssdw (signed saturated 16-bit pack), using
|
||||
// packssdw on the alpha bit (0x8000) will result in a value of 0x7FFF, which is
|
||||
// incorrect. Now if we were to use SSE4.1's packusdw (unsigned saturated 16-bit
|
||||
// pack), we wouldn't have to go through this hassle. But not everyone has an
|
||||
// SSE4.1-capable CPU, so doing this the SSE2 way is more guaranteed to work for
|
||||
// everyone's CPU.
|
||||
//
|
||||
// To use packssdw, we take a bit one position lower for the alpha bit, run
|
||||
// packssdw, then shift the bit back to its original position. Then we por the
|
||||
// alpha vector with the post-packed color vector to get the final color.
|
||||
|
||||
a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000)); // Mask out the bit before A
|
||||
a = _mm_packs_epi32(a, zero_vec128); // Pack 32-bit down to 16-bit
|
||||
a = _mm_slli_epi16(a, 1); // Shift the A bit back to where it needs to be
|
||||
|
||||
// Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in.
|
||||
color = _mm_or_si128(_mm_or_si128(r, g), b);
|
||||
color = _mm_packs_epi32(color, zero_vec128);
|
||||
color = _mm_or_si128(color, a);
|
||||
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color);
|
||||
}
|
||||
|
||||
for (; i < pixCount; i++)
|
||||
{
|
||||
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
|
|
|
@ -109,6 +109,9 @@ protected:
|
|||
size_t _framebufferColorSizeBytes;
|
||||
FragmentColor *_framebufferColor;
|
||||
|
||||
bool _willFlushFramebufferRGBA6665;
|
||||
bool _willFlushFramebufferRGBA5551;
|
||||
|
||||
CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u32 clearImageDepthBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u8 clearImageFogBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
|
@ -155,6 +158,9 @@ public:
|
|||
virtual Render3DError VramReconfigureSignal(); // Called when the emulator reconfigures its VRAM. You may need to invalidate your texture cache.
|
||||
|
||||
virtual Render3DError SetFramebufferSize(size_t w, size_t h); // Called whenever the output framebuffer size changes.
|
||||
|
||||
virtual void GetFramebufferFlushStates(bool &willFlushRGBA6665, bool &willFlushRGBA5551);
|
||||
virtual void SetFramebufferFlushStates(bool willFlushRGBA6665, bool willFlushRGBA5551);
|
||||
};
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
|
Loading…
Reference in New Issue